rm(list=ls())
setwd("/hpc/group/pbenfeylab/CheWei/")
libs <- c("here", "dplyr", "tradeSeq", "SingleCellExperiment", "slingshot",
"condiments", "scater", "RColorBrewer", "pheatmap", "cowplot",
"tidyr","condimentsPaper","Seurat")
suppressMessages(
suppressWarnings(sapply(libs, require, character.only = TRUE))
)
suppressPackageStartupMessages({
library(slingshot); library(SingleCellExperiment)
library(RColorBrewer); library(scales)
library(viridis); library(UpSetR)
library(pheatmap); library(msigdbr)
library(fgsea); library(knitr)
library(ggplot2); library(gridExtra)
library(tradeSeq);library(Seurat)
library(tidyverse);library(condiments)
library(patchwork);library(ComplexHeatmap)
library(circlize);library(WGCNA)
library(tricycle);library(GeneOverlap)
library(gprofiler2);library(ggrepel)
})
sessionInfo()
R version 4.2.2 (2022-10-31) Platform: x86_64-conda-linux-gnu (64-bit) Running under: AlmaLinux 9.3 (Shamrock Pampas Cat) Matrix products: default BLAS/LAPACK: /hpc/group/pbenfeylab/ch416/miniconda3/envs/seu4/lib/libopenblasp-r0.3.21.so locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=en_US.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] grid stats4 stats graphics grDevices utils datasets [8] methods base other attached packages: [1] ggrepel_0.9.4 gprofiler2_0.2.2 [3] GeneOverlap_1.34.0 tricycle_1.6.0 [5] WGCNA_1.72-1 fastcluster_1.2.3 [7] dynamicTreeCut_1.63-1 circlize_0.4.15 [9] ComplexHeatmap_2.14.0 patchwork_1.1.3 [11] forcats_0.5.2 stringr_1.5.1 [13] purrr_1.0.2 readr_2.1.3 [15] tibble_3.2.1 tidyverse_1.3.2 [17] gridExtra_2.3 knitr_1.45 [19] fgsea_1.24.0 msigdbr_7.5.1 [21] UpSetR_1.4.0 viridis_0.6.4 [23] viridisLite_0.4.2 scales_1.2.1 [25] SeuratObject_4.1.3 Seurat_4.1.1.9001 [27] tidyr_1.3.0 cowplot_1.1.1 [29] pheatmap_1.0.12 RColorBrewer_1.1-3 [31] scater_1.26.1 ggplot2_3.4.4 [33] scuttle_1.8.0 condiments_1.6.0 [35] slingshot_2.6.0 TrajectoryUtils_1.6.0 [37] princurve_2.1.6 SingleCellExperiment_1.20.0 [39] SummarizedExperiment_1.28.0 Biobase_2.58.0 [41] GenomicRanges_1.50.0 GenomeInfoDb_1.34.8 [43] IRanges_2.32.0 S4Vectors_0.36.0 [45] BiocGenerics_0.44.0 MatrixGenerics_1.10.0 [47] matrixStats_1.1.0 tradeSeq_1.12.0 [49] dplyr_1.1.3 here_1.0.1 loaded via a namespace (and not attached): [1] rsvd_1.0.5 Hmisc_5.1-1 [3] ica_1.0-3 class_7.3-22 [5] foreach_1.5.2 lmtest_0.9-40 [7] rprojroot_2.0.4 crayon_1.5.2 [9] MASS_7.3-58.3 distinct_1.10.2 [11] nlme_3.1-162 backports_1.4.1 [13] reprex_2.0.2 transport_0.13-0 [15] impute_1.72.0 rlang_1.1.2 [17] XVector_0.38.0 caret_6.0-94 [19] ROCR_1.0-11 readxl_1.4.1 [21] irlba_2.3.5.1 limma_3.54.0 [23] BiocParallel_1.32.5 rjson_0.2.21 [25] bit64_4.0.5 glue_1.6.2 [27] rngtools_1.5.2 sctransform_0.4.1 [29] parallel_4.2.2 vipor_0.4.5 [31] spatstat.sparse_3.0-3 AnnotationDbi_1.60.0 [33] spatstat.geom_3.2-7 haven_2.5.1 [35] tidyselect_1.2.0 fitdistrplus_1.1-11 [37] zoo_1.8-12 xtable_1.8-4 [39] RcppHNSW_0.5.0 magrittr_2.0.3 [41] evaluate_0.23 cli_3.6.1 [43] zlibbioc_1.44.0 rstudioapi_0.15.0 [45] doRNG_1.8.6 miniUI_0.1.1.1 [47] sp_2.1-1 spatstat.linnet_3.1-0 [49] rpart_4.1.19 fastmatch_1.1-3 [51] fastDummies_1.7.3 shiny_1.7.5.1 [53] BiocSingular_1.14.0 xfun_0.41 [55] spatstat.model_3.2-3 clue_0.3-64 [57] cluster_2.1.4 caTools_1.18.2 [59] pbdZMQ_0.3-8 KEGGREST_1.38.0 [61] listenv_0.9.0 Biostrings_2.66.0 [63] png_0.1-8 future_1.33.0 [65] ipred_0.9-14 withr_2.5.2 [67] bitops_1.0-7 plyr_1.8.9 [69] cellranger_1.1.0 hardhat_1.3.0 [71] e1071_1.7-13 pROC_1.18.0 [73] pillar_1.9.0 gplots_3.1.3 [75] GlobalOptions_0.1.2 cachem_1.0.8 [77] Ecume_0.9.1 fs_1.6.3 [79] kernlab_0.9-32 GetoptLong_1.0.5 [81] DelayedMatrixStats_1.20.0 vctrs_0.6.4 [83] ellipsis_0.3.2 generics_0.1.3 [85] lava_1.7.2.1 tools_4.2.2 [87] foreign_0.8-85 beeswarm_0.4.0 [89] munsell_0.5.0 proxy_0.4-27 [91] DelayedArray_0.24.0 fastmap_1.1.1 [93] compiler_4.2.2 abind_1.4-5 [95] httpuv_1.6.12 plotly_4.10.3 [97] GenomeInfoDbData_1.2.9 prodlim_2023.03.31 [99] edgeR_3.40.1 ggnewscale_0.4.8 [101] lattice_0.21-8 deldir_1.0-9 [103] utf8_1.2.4 later_1.3.1 [105] recipes_1.0.6 jsonlite_1.8.7 [107] ScaledMatrix_1.6.0 pbapply_1.7-2 [109] sparseMatrixStats_1.10.0 lazyeval_0.2.2 [111] promises_1.2.1 spatstat_3.0-5 [113] doParallel_1.0.17 goftest_1.2-3 [115] checkmate_2.3.0 spatstat.utils_3.0-4 [117] reticulate_1.34.0 rmarkdown_2.25 [119] Rtsne_0.16 uwot_0.1.16 [121] igraph_1.5.1 survival_3.4-0 [123] htmltools_0.5.7 memoise_2.0.1 [125] locfit_1.5-9.6 digest_0.6.33 [127] assertthat_0.2.1 mime_0.12 [129] repr_1.1.4 RSQLite_2.3.1 [131] future.apply_1.11.0 data.table_1.14.8 [133] blob_1.2.4 preprocessCore_1.60.2 [135] Formula_1.2-5 splines_4.2.2 [137] googledrive_2.0.0 RCurl_1.98-1.6 [139] broom_1.0.2 hms_1.1.2 [141] modelr_0.1.10 colorspace_2.1-0 [143] base64enc_0.1-3 ggbeeswarm_0.7.1 [145] shape_1.4.6 nnet_7.3-19 [147] Rcpp_1.0.11 RANN_2.6.1 [149] mvtnorm_1.1-3 fansi_1.0.5 [151] tzdb_0.3.0 parallelly_1.36.0 [153] IRdisplay_1.1 ModelMetrics_1.2.2.2 [155] R6_2.5.1 ggridges_0.5.4 [157] lifecycle_1.0.4 googlesheets4_1.0.1 [159] leiden_0.4.3 Matrix_1.5-4 [161] RcppAnnoy_0.0.21 iterators_1.0.14 [163] spatstat.explore_3.2-5 gower_1.0.1 [165] htmlwidgets_1.6.2 beachmat_2.14.0 [167] polyclip_1.10-6 timechange_0.1.1 [169] circular_0.4-95 rvest_1.0.3 [171] mgcv_1.8-42 globals_0.16.2 [173] htmlTable_2.4.2 spatstat.random_3.2-1 [175] progressr_0.14.0 codetools_0.2-19 [177] lubridate_1.9.0 GO.db_3.16.0 [179] gtools_3.9.4 dbplyr_2.2.1 [181] RSpectra_0.16-1 gtable_0.3.4 [183] DBI_1.1.3 tensor_1.5 [185] httr_1.4.7 KernSmooth_2.23-20 [187] stringi_1.8.1 reshape2_1.4.4 [189] uuid_1.1-0 timeDate_4022.108 [191] xml2_1.3.3 boot_1.3-28.1 [193] IRkernel_1.3.1.9000 BiocNeighbors_1.16.0 [195] scattermore_1.2 bit_4.0.5 [197] spatstat.data_3.0-3 pkgconfig_2.0.3 [199] babelgene_22.9 gargle_1.2.1
rc.integrated <- readRDS("./tricycle/phloem_atlas_seu4_simplified.rds")
head(rc.integrated@meta.data)
| orig.ident | nCount_RNA | nFeature_RNA | cell_id | sample | barcode | total_counts | detected_genes | cluster | annotation | ... | prediction.score.T40 | prediction.score.T28 | prediction.score.T11 | prediction.score.T39 | prediction.score.T45 | prediction.score.T31 | prediction.score.T20 | consensus.time.group.50 | time.anno.Li.crude | celltype.anno.Li.crude | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <fct> | <dbl> | <int> | <chr> | <chr> | <chr> | <int> | <int> | <int> | <chr> | ... | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <chr> | <chr> | |
| APL_AAACCCAAGTAACGTA-1 | APL | 35383 | 5176 | APL_AAACCCAAGTAACGTA-1 | APL | AAACCCAAGTAACGTA-1 | 35600 | 5210 | 9 | early/undetermined | ... | 0.000000000 | 0.00000000 | 0.00000000 | 0.000000 | 0 | 0.000000000 | 0.01163433 | T0 | Proliferation Domain | Endodermis |
| APL_AAACCCACAACCAACT-1 | APL | 6456 | 3020 | APL_AAACCCACAACCAACT-1 | APL | AAACCCACAACCAACT-1 | 6616 | 3058 | 2 | PSE | ... | 0.000000000 | 0.00000000 | 0.04052317 | 0.000000 | 0 | 0.000000000 | 0.00000000 | T23 | Proliferation Domain | Phloem |
| APL_AAACCCACAGTTGAAA-1 | APL | 12731 | 3650 | APL_AAACCCACAGTTGAAA-1 | APL | AAACCCACAGTTGAAA-1 | 12781 | 3669 | 1 | MSE | ... | 0.000000000 | 0.00000000 | 0.00000000 | 0.000000 | 0 | 0.003022812 | 0.02580615 | T21 | Proliferation Domain | Phloem |
| APL_AAACCCAGTGCCTTTC-1 | APL | 6954 | 3072 | APL_AAACCCAGTGCCTTTC-1 | APL | AAACCCAGTGCCTTTC-1 | 7138 | 3100 | 4 | PPP | ... | 0.000000000 | 0.00000000 | 0.01507838 | 0.000000 | 0 | 0.000000000 | 0.14251363 | T18 | Elongation | Pericycle |
| APL_AAACCCAGTTTCGACA-1 | APL | 7276 | 2483 | APL_AAACCCAGTTTCGACA-1 | APL | AAACCCAGTTTCGACA-1 | 7364 | 2529 | 6 | PSE | ... | 0.004908478 | 0.00000000 | 0.00000000 | 0.139904 | 0 | 0.005391760 | 0.00000000 | T33 | Maturation | Phloem |
| APL_AAACCCATCTGCGAGC-1 | APL | 7045 | 2849 | APL_AAACCCATCTGCGAGC-1 | APL | AAACCCATCTGCGAGC-1 | 7114 | 2869 | 3 | CC | ... | 0.000000000 | 0.08276203 | 0.00000000 | 0.000000 | 0 | 0.007452301 | 0.00000000 | T27 | Proliferation Domain | Phloem |
table(rc.integrated$orig.ident)
APL MAKR5 MAKR5diff PEARdel S17 sAPL
5360 542 2022 649 1461 170
DimPlot(rc.integrated, reduction = "umap", group.by = "annotation")
DimPlot(rc.integrated, reduction = "umap", group.by = "celltype.anno.Li.crude", label=TRUE)
DimPlot(rc.integrated, reduction = "umap", group.by = "time.anno.Li.crude", label=TRUE)
rc.integrated
An object of class Seurat 17396 features across 10204 samples within 1 assay Active assay: RNA (17396 features, 17396 variable features) 2 dimensional reductions calculated: pca, umap
rc.integrated <- subset(rc.integrated, cells=colnames(rc.integrated)[which(rc.integrated$annotation=="dividing" | rc.integrated$annotation=="early PSE" | rc.integrated$annotation=="early/undetermined"| rc.integrated$annotation=="PSE"| rc.integrated$annotation=="MSE" | rc.integrated$annotation=="CC")])
rc.integrated <- subset(rc.integrated, cells=colnames(rc.integrated)[which(rc.integrated$time.anno.Li.crude=="Proliferation Domain")])
rc.integrated
An object of class Seurat 17396 features across 3645 samples within 1 assay Active assay: RNA (17396 features, 17396 variable features) 2 dimensional reductions calculated: pca, umap
DimPlot(rc.integrated, reduction = "umap", group.by = "annotation")
wanted_cols <- c("orig.ident", "annotation", "tricyclePosition","tricycleCCStage")
rc.integrated@meta.data <- rc.integrated@meta.data[,wanted_cols]
colnames(rc.integrated@meta.data) <- c("sample", "annotation","tricyclePosition","tricycleCCStage")
table(rc.integrated$sample)
APL MAKR5 MAKR5diff PEARdel S17 sAPL
1244 389 1563 357 43 49
ccgl <- read.csv("./tradeseq/245_cell_cycle_related_genes_for_reference.csv")
ccgl <- ccgl$GeneID
options(repr.plot.width=6, repr.plot.height=6)
DimPlot(rc.integrated, reduction = "umap", group.by = "tricycleCCStage", label = FALSE, pt.size=2)
table(rc.integrated$tricycleCCStage)
G1/G0 G2M S 1177 1129 1339
options(repr.plot.width=6, repr.plot.height=6)
DimPlot(rc.integrated, reduction = "pca", group.by = "tricycleCCStage", label = FALSE, pt.size=2)
rc.integrated <- RunPCA(rc.integrated,features = ccgl)
rc.integrated <- FindNeighbors(rc.integrated, dims = 1:10)
rc.integrated <- RunUMAP(rc.integrated, dims = 1:10)
Warning message in PrepDR(object = object, features = features, verbose = verbose): “The following 3 features requested have not been scaled (running reduction without them): AT5G47500, AT1G33670, AT5G05900” PC_ 1 Positive: AT5G55520, AT4G05190, AT1G16630, AT5G51600, AT4G14330, AT1G76310, AT4G26660, AT1G02730, AT4G01730, AT4G38062 AT3G11520, AT1G03780, AT3G56100, AT3G15550, AT4G09060, AT1G20610, AT2G22610, AT5G60930, AT3G44050, AT1G18370 AT3G23670, AT5G45700, AT4G35620, AT4G37490, AT3G55660, AT5G15510, AT5G56120, AT3G58650, AT3G26050, AT1G34355 Negative: AT3G12170, AT5G52220, AT2G20980, AT3G48490, AT1G80190, AT2G44580, AT4G00020, AT2G29680, AT3G23740, AT3G52115 AT4G30860, AT4G31400, AT2G24970, AT1G75150, AT1G26330, AT3G13060, AT4G12620, AT2G37560, AT3G24495, AT1G20720 AT3G27640, AT4G21070, AT3G62110, AT3G05740, AT3G59550, AT3G25100, AT3G18730, AT2G42190, AT1G07270, AT5G46740 PC_ 2 Positive: AT1G07020, AT3G62110, AT5G56120, AT3G13060, AT5G15510, AT2G42190, AT4G02800, AT4G35620, AT3G03810, AT3G51230 AT4G34960, AT4G17000, AT5G13840, AT3G15550, AT1G02730, AT1G03780, AT3G11520, AT1G11220, AT4G38230, AT3G55660 AT3G58100, AT5G01420, AT4G26660, AT5G48310, AT1G05520, AT4G19185, AT5G45700, AT1G76310, AT5G44040, AT1G18370 Negative: AT3G12170, AT4G30860, AT4G00020, AT2G20980, AT2G44580, AT5G43080, AT1G69770, AT1G29418, AT3G48490, AT5G46740 AT1G26330, AT3G24495, AT5G52220, AT1G80190, AT3G48160, AT3G55340, AT1G75150, AT1G77630, AT3G27640, AT3G52115 AT3G23740, AT3G59550, AT4G31400, AT1G20720, AT5G02820, AT4G12620, AT5G13830, AT3G18730, AT4G21070, AT2G37560 PC_ 3 Positive: AT5G56120, AT1G63100, AT3G54710, AT4G02800, AT3G51230, AT5G15510, AT5G44040, AT1G04425, AT3G55660, AT1G03780 AT5G48310, AT1G18370, AT3G15550, AT1G23790, AT5G01420, AT5G13840, AT1G75640, AT4G17000, AT4G35620, AT1G11220 AT1G02730, AT3G11520, AT5G45700, AT4G26660, AT3G12170, AT2G44190, AT3G48490, AT5G55830, AT2G44580, AT4G37490 Negative: AT1G72670, AT1G44110, AT1G10780, AT1G53140, AT1G76740, AT2G47920, AT2G36200, AT4G21270, AT5G55820, AT4G18570 AT1G61450, AT3G19050, AT4G11080, AT4G13370, AT5G25380, AT1G49910, AT2G42290, AT4G21820, AT4G17240, AT5G60150 AT5G60930, AT4G39630, AT1G59540, AT5G11510, AT4G12540, AT2G32590, AT5G07810, AT5G37630, AT3G58650, AT4G14150 PC_ 4 Positive: AT3G55340, AT3G06840, AT1G04425, AT5G13830, AT4G34360, AT1G77550, AT3G54710, AT1G29418, AT3G13150, AT5G42700 AT1G30960, AT4G15850, AT1G77630, AT1G62150, AT1G63100, AT1G75640, AT1G53542, AT2G40570, AT2G17670, AT5G12440 AT4G16700, AT5G59240, AT1G09700, AT4G12740, AT5G09380, AT2G04530, AT3G46020, AT4G28020, AT1G73180, AT1G19520 Negative: AT3G52115, AT3G27640, AT5G43080, AT3G48490, AT4G30860, AT3G25100, AT4G21070, AT2G29680, AT5G46740, AT1G75150 AT4G00020, AT2G42260, AT3G48160, AT2G20980, AT1G76740, AT4G31400, AT3G24495, AT5G20850, AT5G25380, AT1G05440 AT3G12170, AT1G07270, AT4G01730, AT2G24970, AT4G35620, AT1G20610, AT5G23910, AT1G80190, AT3G18680, AT1G20720 PC_ 5 Positive: AT4G22970, AT5G37630, AT1G69770, AT3G54750, AT4G15890, AT1G29418, AT5G07810, AT3G13060, AT5G63920, AT1G50240 AT3G60740, AT2G32590, AT1G05520, AT1G20410, AT3G19050, AT1G72480, AT1G75640, AT1G63100, AT4G12540, AT2G44190 AT4G01810, AT5G63120, AT1G18370, AT3G24495, AT5G51560, AT3G13150, AT5G13840, AT2G33610, AT5G47400, AT1G09280 Negative: AT3G46020, AT3G06840, AT4G34360, AT5G13830, AT3G12170, AT1G61450, AT5G14660, AT3G48490, AT5G52220, AT2G47920 AT5G42700, AT2G34450, AT2G24970, AT1G53542, AT5G06590, AT4G39630, AT5G63690, AT5G09500, AT4G04190, AT5G12360 AT4G11080, AT2G44580, AT1G80190, AT1G44110, AT2G20980, AT5G45700, AT4G05190, AT4G01730, AT4G09060, AT3G26050 Computing nearest neighbor graph Computing SNN Warning message: “The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation' This message will be shown once per session” 14:33:22 UMAP embedding parameters a = 0.9922 b = 1.112 14:33:22 Read 3645 rows and found 10 numeric columns 14:33:22 Using Annoy for neighbor search, n_neighbors = 30 14:33:22 Building Annoy index with metric = cosine, n_trees = 50 0% 10 20 30 40 50 60 70 80 90 100% [----|----|----|----|----|----|----|----|----|----| * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * | 14:33:23 Writing NN index file to temp file /tmp/Rtmp21YxYc/file3782c157914d8 14:33:23 Searching Annoy index using 1 thread, search_k = 3000 14:33:24 Annoy recall = 100% 14:33:25 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30 14:33:28 Initializing from normalized Laplacian + noise (using RSpectra) 14:33:29 Commencing optimization for 500 epochs, with 146446 positive edges 14:33:34 Optimization finished
#rc.integrated[["umap"]]@cell.embeddings[,1] <- rc.integrated[["umap"]]@cell.embeddings[,1]*-1
rc.integrated[["umap"]]@cell.embeddings[,2] <- rc.integrated[["umap"]]@cell.embeddings[,2]*-1
#u2 <- rc.integrated@reductions$umap@cell.embeddings[,1]
#u1 <- rc.integrated@reductions$umap@cell.embeddings[,2]
#rc.integrated@reductions$umap@cell.embeddings[,1] <- u1
#rc.integrated@reductions$umap@cell.embeddings[,2] <- u2
options(repr.plot.width=6, repr.plot.height=6)
DimPlot(rc.integrated, reduction = "umap", group.by = "tricycleCCStage", label = FALSE, pt.size=2)
grDevices::cairo_pdf("./pdfs/Phloem_atlas_Proliferation_Domain_cells_UMAP_with_245_CCgenes.pdf",width=6, height=6)
print(DimPlot(rc.integrated, reduction = "umap", group.by = "tricycleCCStage", label = FALSE, pt.size=2))
dev.off()
sce <- as.SingleCellExperiment(rc.integrated)
sce$tricyclePosition <- rc.integrated$tricyclePosition*pi
options(repr.plot.width=9, repr.plot.height=6)
## Tricycle position on umap
p <- plot_emb_circle_scale(sce, dimred = "UMAP",
point.size = 5, point.alpha = 1) +
theme_void(base_size = 14)
legend <- circle_scale_legend(text.size = 5, alpha = 1)
print(plot_grid(p, legend, ncol = 2, rel_widths = c(1, 0.7)))
grDevices::cairo_pdf("./pdfs/Phloem_atlas_Proliferation_Domain_cells_UMAP_tricyclePosition_with_245_CCgenes.pdf",width=9, height=6)
print(plot_grid(p, legend, ncol = 2, rel_widths = c(1, 0.7)))
dev.off()
options(repr.plot.width=6, repr.plot.height=6)
DimPlot(rc.integrated, reduction = "pca", group.by = "tricycleCCStage", label = FALSE, pt.size=2)
pltsg <- function(gene){
keygene.idx <- which(rownames(rc.integrated@assays$RNA@data) == gene)
fit.l <- fit_periodic_loess(rc.integrated$tricyclePosition*pi,
rc.integrated@assays$RNA@data[keygene.idx,],
plot = FALSE, span=0.3,
x_lab = "Cell cycle position \u03b8", y_lab = "log2(keygene)")
fit <- fit.l$pred.df
options(repr.plot.width=8, repr.plot.height=6)
print(ggplot(fit, aes(x=x,y=y)) + geom_path(size = 1, alpha = 1)+ labs(x = "θ", y = "smoothed gene expression", title = gene) +
scale_x_continuous(limits = c(0, 2 * pi), breaks = c(0,
pi/2, pi, 3 * pi/2, 2 * pi), labels = paste0(c(0,
0.5, 1, 1.5, 2), "π"), name = "θ")+
# scale_x_continuous(limits = c(0, 200), breaks = c(0,
# 50, 100, 150, 200), labels = paste0(c(0,
# 0.5, 1, 1.5, 2), "π"), name = "θ")+
theme_bw(base_size = 14)+
theme(legend.title=element_blank()))
}
## DWF4
grDevices::cairo_pdf("Phloem_atlas_DWF4.pdf",width=8, height=6)
pltsg('AT3G50660')
dev.off()
pltsg('AT3G50660')
Warning message:
“Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.”
## OPS
grDevices::cairo_pdf("Phloem_atlas_OPS.pdf",width=8, height=6)
pltsg('AT3G09070')
dev.off()
pltsg('AT3G09070')
## OPL2
grDevices::cairo_pdf("Phloem_atlas_OPL2.pdf",width=8, height=6)
pltsg('AT2G38070')
dev.off()
pltsg('AT2G38070')
## G1 marker reported by Laura Lee
pltsg('AT5G21940')
## S marker reported by Laura Lee
pltsg('AT5G10390')
## G2M marker reported by Laura Lee
pltsg('AT4G23800')
DefaultAssay(rc.integrated) <- "RNA"
nrow(rc.integrated)
GL <- read.csv("./tradeseq/BES1_BZR1_2X_ChIP_targets.csv")
GL <- intersect(GL$GeneID,rownames(rc.integrated))
GL2 <- read.csv("./tradeseq/BES1_BZR1_target_BL2hr_vs_BRZ.csv")
GL2up <- GL2 %>% filter(BL2hr_vs_BRZ=="BES1_Target_BL_up")
GL2up <- intersect(GL2up$GeneID, rownames(rc.integrated))
GL2down <- GL2 %>% filter(BL2hr_vs_BRZ=="BES1_Target_BL_down")
GL2down <- intersect(GL2down$GeneID, rownames(rc.integrated))
GL2mixed <- GL2 %>% filter(BL2hr_vs_BRZ=="BES1_Target_BL_mixed")
GL2mixed <- intersect(GL2mixed$GeneID, rownames(rc.integrated))
GL3 <- read.csv("./tradeseq/MYB3R4_Targets.csv")
GL3 <- intersect(GL3$GeneID, rownames(rc.integrated))
DE <- read.csv("./tradeseq/v4_BL2hr_v_BRZ_cell_time_EdgeR_q0.05_FC1.5_r_v_4_20220112.csv", row.names = 1)
DE <- DE %>% filter(cluster_id == "Proliferation Domain_Atrichoblast")
head(DE)
| gene | cluster_id | sc_2.cpm | sc_43.cpm | sc_50.cpm | sc_5.cpm | sc_46.cpm | sc_49.cpm | sc_2.frq | sc_43.frq | ... | F | p_val | p_adj.loc | p_adj.glb | contrast | Name | TF_Name | Description | up_dn_label | clust_up_dn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | ... | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | |
| 1 | AT3G50660 | Proliferation Domain_Atrichoblast | 66.5 | 53.90 | 58.10 | 3.04 | 1.22 | 1.31 | 0.7780 | 0.6530 | ... | 603 | 1.14e-26 | 2.56e-22 | 7.02e-21 | BL-BRZ | CYP90B1 | NA | NA | Down | Proliferation Domain_Atrichoblast_Down |
| 2 | AT4G36780 | Proliferation Domain_Atrichoblast | 227.0 | 167.00 | 196.00 | 17.90 | 12.50 | 12.70 | 0.9490 | 0.8930 | ... | 551 | 6.89e-26 | 7.70e-22 | 4.22e-20 | BL-BRZ | BEH2 | BEH2 | BES1/BZR1 homolog 2 | Down | Proliferation Domain_Atrichoblast_Down |
| 3 | AT4G01630 | Proliferation Domain_Atrichoblast | 12.0 | 6.87 | 11.50 | 209.00 | 142.00 | 142.00 | 0.2530 | 0.1480 | ... | 508 | 3.38e-25 | 2.52e-21 | 2.07e-19 | BL-BRZ | EXPA17 | NA | NA | Up | Proliferation Domain_Atrichoblast_Up |
| 4 | AT2G43890 | Proliferation Domain_Atrichoblast | 2.4 | 1.77 | 2.28 | 110.00 | 31.40 | 41.30 | 0.0606 | 0.0473 | ... | 454 | 2.98e-24 | 1.67e-20 | 1.83e-18 | BL-BRZ | AT2G43890 | NA | NA | Up | Proliferation Domain_Atrichoblast_Up |
| 5 | AT4G16780 | Proliferation Domain_Atrichoblast | 54.0 | 57.00 | 56.20 | 3.99 | 4.29 | 5.76 | 0.6870 | 0.7080 | ... | 406 | 2.55e-23 | 1.14e-19 | 1.56e-17 | BL-BRZ | HAT4 | HB-2 | homeobox protein 2 | Down | Proliferation Domain_Atrichoblast_Down |
| 6 | AT1G07985 | Proliferation Domain_Atrichoblast | 162.0 | 176.00 | 228.00 | 23.00 | 21.80 | 16.40 | 0.8990 | 0.7460 | ... | 363 | 2.25e-22 | 8.40e-19 | 1.38e-16 | BL-BRZ | AT1G07985 | NA | NA | Down | Proliferation Domain_Atrichoblast_Down |
rc.integrated
An object of class Seurat 17396 features across 3645 samples within 1 assay Active assay: RNA (17396 features, 17396 variable features) 2 dimensional reductions calculated: pca, umap
table(rc.integrated$sample)
APL MAKR5 MAKR5diff PEARdel S17 sAPL
1244 389 1563 357 43 49
y.list <- c()
for (gene in rownames(rc.integrated)){
keygene.idx <- which(rownames(rc.integrated) == gene)
fit.l <- fit_periodic_loess(rc.integrated$tricyclePosition*pi,
rc.integrated@assays$RNA@data[keygene.idx,],
plot = FALSE, span=0.3,
x_lab = "Cell cycle position \u03b8", y_lab = "log2(keygene)")
fit <- fit.l$pred.df
y.list <- cbind(y.list,fit$y)
}
colnames(y.list) <- rownames(rc.integrated)
write.csv(y.list, "./tradeseq/All_genes_Phloem_atlas_cells_tricycle.csv")
y.list <- read.csv("./tradeseq/All_genes_Phloem_atlas_cells_tricycle.csv", row.names=1)
y.list <- as.matrix(y.list)
head(y.list)
| AT1G01010 | AT1G01020 | AT1G01030 | AT1G01040 | AT1G01050 | AT1G01060 | AT1G01070 | AT1G01080 | AT1G01090 | AT1G01100 | ... | AT5G67540 | AT5G67560 | AT5G67570 | AT5G67580 | AT5G67590 | AT5G67600 | AT5G67610 | AT5G67620 | AT5G67630 | AT5G67640 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0.002851638 | 0.2683734 | 0.003660250 | 0.04952332 | 1.143598 | 0.003388342 | 0.003808542 | 0.008276280 | 0.5827933 | 3.244628 | ... | 0.09487781 | 0.3355283 | 0.05438583 | 0.06066394 | 0.9769679 | 1.477855 | 0.06591079 | 0.0004477556 | 0.1659255 | 0.07384151 |
| 2 | 0.002795392 | 0.2686408 | 0.003610621 | 0.04977569 | 1.151058 | 0.003366945 | 0.003894708 | 0.008181032 | 0.5820764 | 3.241383 | ... | 0.09600374 | 0.3379792 | 0.05449268 | 0.06167051 | 0.9805964 | 1.487969 | 0.06539267 | 0.0004734249 | 0.1629890 | 0.07338066 |
| 3 | 0.002740628 | 0.2689324 | 0.003559951 | 0.05004624 | 1.158525 | 0.003351505 | 0.003980497 | 0.008088692 | 0.5813535 | 3.238083 | ... | 0.09714855 | 0.3404688 | 0.05461508 | 0.06271770 | 0.9841949 | 1.498229 | 0.06488493 | 0.0005001382 | 0.1600546 | 0.07292797 |
| 4 | 0.002687371 | 0.2692476 | 0.003508313 | 0.05033416 | 1.165993 | 0.003341862 | 0.004065793 | 0.007999323 | 0.5806256 | 3.234734 | ... | 0.09831048 | 0.3429935 | 0.05475236 | 0.06380337 | 0.9877611 | 1.508621 | 0.06438769 | 0.0005278472 | 0.1571250 | 0.07248405 |
| 5 | 0.002635642 | 0.2695857 | 0.003455783 | 0.05063863 | 1.173452 | 0.003337858 | 0.004150478 | 0.007912987 | 0.5798939 | 3.231341 | ... | 0.09948777 | 0.3455497 | 0.05490387 | 0.06492541 | 0.9912926 | 1.519131 | 0.06390105 | 0.0005565033 | 0.1542029 | 0.07204952 |
| 6 | 0.002585466 | 0.2699458 | 0.003402434 | 0.05095885 | 1.180896 | 0.003339333 | 0.004234435 | 0.007829746 | 0.5791595 | 3.227909 | ... | 0.10067866 | 0.3481336 | 0.05506895 | 0.06608167 | 0.9947872 | 1.529747 | 0.06342511 | 0.0005860584 | 0.1512911 | 0.07162501 |
nrow(y.list)
## Remove zeros in y.list()
y.list <- y.list[,-as.numeric(which(colSums(y.list)==0))]
ncol(y.list)
allowWGCNAThreads()
Allowing multi-threading with up to 94 threads.
# Choose a set of soft-thresholding powers
powers = c(c(1:100))
# Call the network topology analysis function
sft = pickSoftThreshold(
y.list, # <= Input data
#blockSize = 30,
powerVector = powers,
verbose = 5
)
pickSoftThreshold: will use block size 2573.
pickSoftThreshold: calculating connectivity for given powers...
..working on genes 1 through 2573 of 17386
..working on genes 2574 through 5146 of 17386
..working on genes 5147 through 7719 of 17386
..working on genes 7720 through 10292 of 17386
..working on genes 10293 through 12865 of 17386
..working on genes 12866 through 15438 of 17386
..working on genes 15439 through 17386 of 17386
Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
1 1 8.96e-01 4.250000 0.9000 10900 11900 13100
2 2 8.51e-01 2.070000 0.8600 8360 9370 11200
3 3 7.54e-01 1.260000 0.7090 6880 7750 10000
4 4 6.52e-01 0.732000 0.5550 5890 6600 9140
5 5 5.30e-01 0.486000 0.4280 5170 5720 8450
6 6 3.67e-01 0.318000 0.2970 4620 5040 7880
7 7 2.14e-01 0.208000 0.1800 4170 4480 7400
8 8 9.17e-02 0.121000 0.1200 3810 4030 6990
9 9 2.33e-02 0.055500 0.0888 3510 3650 6630
10 10 2.92e-07 0.000188 0.0838 3250 3330 6310
11 11 1.85e-02 -0.046300 0.1280 3030 3050 6030
12 12 6.97e-02 -0.088900 0.1980 2840 2810 5770
13 13 1.35e-01 -0.124000 0.2710 2670 2600 5540
14 14 2.08e-01 -0.155000 0.3450 2520 2410 5330
15 15 2.78e-01 -0.180000 0.4040 2380 2250 5130
16 16 3.60e-01 -0.209000 0.4890 2260 2110 4950
17 17 4.24e-01 -0.232000 0.5500 2150 1980 4790
18 18 4.90e-01 -0.255000 0.6100 2050 1860 4630
19 19 5.46e-01 -0.276000 0.6610 1960 1760 4490
20 20 5.91e-01 -0.294000 0.6970 1870 1660 4350
21 21 6.34e-01 -0.311000 0.7400 1790 1580 4220
22 22 6.75e-01 -0.327000 0.7790 1720 1500 4100
23 23 7.05e-01 -0.343000 0.8090 1660 1430 3990
24 24 7.34e-01 -0.357000 0.8340 1600 1360 3890
25 25 7.58e-01 -0.370000 0.8540 1540 1300 3780
26 26 7.77e-01 -0.382000 0.8660 1480 1240 3690
27 27 7.99e-01 -0.394000 0.8800 1430 1190 3600
28 28 8.14e-01 -0.404000 0.8910 1390 1140 3510
29 29 8.25e-01 -0.415000 0.8990 1340 1090 3430
30 30 8.39e-01 -0.425000 0.9060 1300 1050 3350
31 31 8.51e-01 -0.434000 0.9160 1260 1010 3280
32 32 8.63e-01 -0.443000 0.9230 1220 974 3210
33 33 8.70e-01 -0.452000 0.9270 1190 940 3140
34 34 8.82e-01 -0.460000 0.9340 1160 907 3080
35 35 8.91e-01 -0.468000 0.9410 1120 876 3010
36 36 8.96e-01 -0.475000 0.9430 1090 846 2950
37 37 8.99e-01 -0.483000 0.9450 1070 819 2900
38 38 9.04e-01 -0.490000 0.9480 1040 793 2840
39 39 9.08e-01 -0.497000 0.9510 1010 769 2790
40 40 9.13e-01 -0.503000 0.9530 988 746 2740
41 41 9.16e-01 -0.509000 0.9560 965 724 2690
42 42 9.21e-01 -0.516000 0.9590 942 702 2640
43 43 9.24e-01 -0.521000 0.9610 920 682 2590
44 44 9.26e-01 -0.526000 0.9620 899 663 2550
45 45 9.30e-01 -0.532000 0.9630 879 644 2500
46 46 9.33e-01 -0.537000 0.9650 860 627 2460
47 47 9.35e-01 -0.541000 0.9680 842 611 2420
48 48 9.38e-01 -0.545000 0.9710 824 596 2380
49 49 9.41e-01 -0.550000 0.9730 807 581 2350
50 50 9.42e-01 -0.554000 0.9750 791 566 2310
51 51 9.44e-01 -0.560000 0.9740 775 550 2270
52 52 9.45e-01 -0.565000 0.9740 760 537 2240
53 53 9.47e-01 -0.569000 0.9750 745 523 2210
54 54 9.48e-01 -0.573000 0.9760 731 511 2180
55 55 9.49e-01 -0.577000 0.9760 717 499 2140
56 56 9.51e-01 -0.580000 0.9760 704 488 2110
57 57 9.53e-01 -0.584000 0.9760 691 477 2080
58 58 9.55e-01 -0.587000 0.9770 679 466 2060
59 59 9.56e-01 -0.591000 0.9760 667 456 2030
60 60 9.57e-01 -0.595000 0.9760 656 446 2000
61 61 9.57e-01 -0.598000 0.9760 644 436 1970
62 62 9.57e-01 -0.601000 0.9760 633 427 1950
63 63 9.58e-01 -0.605000 0.9760 623 418 1920
64 64 9.58e-01 -0.609000 0.9750 613 409 1900
65 65 9.58e-01 -0.612000 0.9740 603 401 1880
66 66 9.59e-01 -0.616000 0.9730 593 393 1850
67 67 9.60e-01 -0.618000 0.9750 584 386 1830
68 68 9.62e-01 -0.621000 0.9760 575 379 1810
69 69 9.62e-01 -0.624000 0.9750 566 371 1790
70 70 9.63e-01 -0.627000 0.9760 557 364 1760
71 71 9.63e-01 -0.629000 0.9750 549 358 1740
72 72 9.64e-01 -0.632000 0.9750 541 351 1720
73 73 9.64e-01 -0.634000 0.9750 533 345 1700
74 74 9.64e-01 -0.637000 0.9750 525 338 1680
75 75 9.64e-01 -0.640000 0.9740 517 333 1670
76 76 9.64e-01 -0.643000 0.9740 510 327 1650
77 77 9.65e-01 -0.646000 0.9740 503 321 1630
78 78 9.65e-01 -0.648000 0.9720 496 316 1610
79 79 9.65e-01 -0.651000 0.9730 489 310 1590
80 80 9.65e-01 -0.653000 0.9730 482 304 1580
81 81 9.65e-01 -0.655000 0.9730 476 299 1560
82 82 9.67e-01 -0.658000 0.9740 470 294 1540
83 83 9.67e-01 -0.659000 0.9750 464 289 1530
84 84 9.67e-01 -0.661000 0.9740 457 284 1510
85 85 9.67e-01 -0.663000 0.9740 452 280 1500
86 86 9.67e-01 -0.665000 0.9740 446 276 1480
87 87 9.67e-01 -0.668000 0.9730 440 271 1470
88 88 9.67e-01 -0.670000 0.9720 435 267 1450
89 89 9.67e-01 -0.672000 0.9710 429 263 1440
90 90 9.67e-01 -0.674000 0.9710 424 260 1420
91 91 9.67e-01 -0.676000 0.9700 419 256 1410
92 92 9.68e-01 -0.678000 0.9700 414 252 1400
93 93 9.69e-01 -0.680000 0.9710 409 248 1380
94 94 9.70e-01 -0.682000 0.9720 404 244 1370
95 95 9.69e-01 -0.685000 0.9700 399 241 1360
96 96 9.69e-01 -0.688000 0.9690 395 238 1350
97 97 9.68e-01 -0.689000 0.9690 390 235 1340
98 98 9.69e-01 -0.692000 0.9690 386 232 1320
99 99 9.68e-01 -0.694000 0.9670 381 228 1310
100 100 9.67e-01 -0.695000 0.9660 377 225 1300
options(repr.plot.width=24, repr.plot.height=6)
par(mfrow = c(1,2));
cex1 = 0.9;
plot(sft$fitIndices[, 1],
-sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
xlab = "Soft Threshold (power)",
ylab = "Scale Free Topology Model Fit, signed R^2",
main = paste("Scale independence")
)
text(sft$fitIndices[, 1],
-sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
labels = powers, cex = cex1, col = "red"
)
abline(h = 0.90, col = "red")
plot(sft$fitIndices[, 1],
sft$fitIndices[, 5],
xlab = "Soft Threshold (power)",
ylab = "Mean Connectivity",
type = "n",
main = paste("Mean connectivity")
)
text(sft$fitIndices[, 1],
sft$fitIndices[, 5],
labels = powers,
cex = cex1, col = "red")
picked_power = 30
temp_cor <- cor
cor <- WGCNA::cor # Force it to use WGCNA cor function (fix a namespace conflict issue)
netwk <- blockwiseModules(y.list, # <= input here
# == Adjacency Function ==
power = picked_power, # <= power here
networkType = "signed",
# == Tree and Block Options ==
deepSplit = 2,
pamRespectsDendro = F,
# detectCutHeight = 0.75,
minModuleSize = 200,
maxBlockSize = 4000,
# == Module Adjustments ==
reassignThreshold = 0,
mergeCutHeight = 0.25,
# == TOM == Archive the run results in TOM file (saves time)
saveTOMs = T,
saveTOMFileBase = "ER",
# == Output Options
numericLabels = T,
verbose = 3)
Calculating module eigengenes block-wise from all genes
Flagging genes and samples with too many missing values...
..step 1
....pre-clustering genes to determine blocks..
Projective K-means:
..k-means clustering..
..merging smaller clusters...
Block sizes:
gBlocks
1 2 3 4 5 6
3874 3289 3234 2974 2659 1356
..Working on block 1 .
TOM calculation: adjacency..
..will use 94 parallel threads.
Fraction of slow calculations: 0.000000
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
..saving TOM for block 1 into file ER-block.1.RData
....clustering..
....detecting modules..
....calculating module eigengenes..
....checking kME in modules..
..removing 1 genes from module 1 because their KME is too low.
..removing 10 genes from module 2 because their KME is too low.
..Working on block 2 .
TOM calculation: adjacency..
..will use 94 parallel threads.
Fraction of slow calculations: 0.000000
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
..saving TOM for block 2 into file ER-block.2.RData
....clustering..
....detecting modules..
....calculating module eigengenes..
....checking kME in modules..
..Working on block 3 .
TOM calculation: adjacency..
..will use 94 parallel threads.
Fraction of slow calculations: 0.000000
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
..saving TOM for block 3 into file ER-block.3.RData
....clustering..
....detecting modules..
....calculating module eigengenes..
....checking kME in modules..
..Working on block 4 .
TOM calculation: adjacency..
..will use 94 parallel threads.
Fraction of slow calculations: 0.000000
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
..saving TOM for block 4 into file ER-block.4.RData
....clustering..
....detecting modules..
....calculating module eigengenes..
....checking kME in modules..
..Working on block 5 .
TOM calculation: adjacency..
..will use 94 parallel threads.
Fraction of slow calculations: 0.000000
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
..saving TOM for block 5 into file ER-block.5.RData
....clustering..
....detecting modules..
....calculating module eigengenes..
....checking kME in modules..
..Working on block 6 .
TOM calculation: adjacency..
..will use 94 parallel threads.
Fraction of slow calculations: 0.000000
..connectivity..
..matrix multiplication (system BLAS)..
..normalization..
..done.
..saving TOM for block 6 into file ER-block.6.RData
....clustering..
....detecting modules..
....calculating module eigengenes..
....checking kME in modules..
..removing 2 genes from module 3 because their KME is too low.
..merging modules that are too close..
mergeCloseModules: Merging modules whose distance is less than 0.25
Calculating new MEs...
options(repr.plot.width=24, repr.plot.height=6)
# Convert labels to colors for plotting
mergedColors = labels2colors(netwk$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(
netwk$dendrograms[[1]],
mergedColors[netwk$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE,
hang = 0.03,
addGuide = TRUE,
guideHang = 0.05 )
head(netwk$colors[netwk$blockGenes[[1]]])
table(netwk$colors)
0 1 2 3 4 5 6 7 8 9 10 13 6758 6649 818 715 687 503 384 304 291 264
module_df <- data.frame(
gene_id = names(netwk$colors),
colors = labels2colors(netwk$colors)
)
module_df[1:5,]
| gene_id | colors | |
|---|---|---|
| <chr> | <chr> | |
| 1 | AT1G01010 | turquoise |
| 2 | AT1G01020 | red |
| 3 | AT1G01030 | turquoise |
| 4 | AT1G01040 | turquoise |
| 5 | AT1G01050 | blue |
table(module_df$colors)
black blue brown green grey magenta pink purple
384 6649 818 687 13 291 304 264
red turquoise yellow
503 6758 715
options(repr.plot.width=8, repr.plot.height=8)
# Get Module Eigengenes per cluster
MEs0 <- moduleEigengenes(y.list, mergedColors)$eigengenes
# Reorder modules so similar modules are next to each other
MEs0 <- orderMEs(MEs0)
module_order = names(MEs0) %>% gsub("ME","", .)
# Add treatment names
MEs0$treatment = row.names(MEs0)
# tidy & plot data
mME = MEs0 %>%
pivot_longer(-treatment) %>%
mutate(
name = gsub("ME", "", name),
name = factor(name, levels = module_order)
)
#mME <- mME[-which(mME$name == "grey"),]
mME$treatment <- factor(mME$treatment, levels = seq(1,200))
mME$tricycle_position <- mME$treatment
#mME$name <- factor(mME$name, levels = rev(c("green", "grey", "black", "blue", "brown", "turquoise", "yellow", "red", "pink", "magenta")))
mME$name <- factor(mME$name, levels = rev(c("brown", "red", "black", "green", "turquoise", "yellow", "blue", "magenta", "pink","purple","grey")))
mME$module <- as.character(mME$name)
#mME$module[which(mME$module=="green")]="G1-M1"
#mME$module[which(mME$module=="grey")]="G1-M2"
#mME$module[which(mME$module=="blue")]="G1-M3"
#mME$module[which(mME$module=="black")]="G1-M4"
#mME$module[which(mME$module=="brown")]="G1S-M5"
#mME$module[which(mME$module=="turquoise")]="G1S-M6"
#mME$module[which(mME$module=="yellow")]="S-M7"
#mME$module[which(mME$module=="magenta")]="G2M-M8"
#mME$module[which(mME$module=="red")]="G2MG1-M9"
#mME$module[which(mME$module=="pink")]="G1S-M10"
#mME$module[which(mME$module=="grey")]="Unassigned"
#mME$module <- factor(mME$module, levels = rev(c("G1-M1", "G1-M2", "G1-M3", "G1-M4", "G1S-M5", "G1S-M6", "S-M7", "G2M-M8", "G2MG1-M9", "G1S-M10", "Unassigned")))
mME %>% ggplot(., aes(x=tricycle_position, y=name, fill=value)) +
geom_tile() +
theme_bw() +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "white",
midpoint = 0,
# limit = c(-1,1)
) +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-trait Relationships", y = "Modules", fill="corr")
## Prepare gene module list
module_sel <- select(module_df, gene_id, colors)
module_list <- split(module_sel, f=module_sel$colors)
#this makes list from long df of gene lists - TARGET is what we want to keep
module_list <- lapply(module_list, function(x) x[names(x)=="gene_id"])
# convert each sublist into character and eliminate duplicates
module_list <- lapply(module_list, function(x) as.character(unique(x$gene_id)))
cluster_GO <- gost(module_list, organism = "athaliana", correction_method = "fdr", significant = F, multi_query = F)
cluster_GO_df <- cluster_GO[[1]]
cluster_GO_sig <- filter(cluster_GO_df, p_value<=0.01)
# top terms for each cluster
cluster_GO_sig %>%
filter(source=="GO:BP", intersection_size>=4) %>%
group_by(query) %>%
top_n(5, wt = -p_value) %>%
arrange(desc(p_value)) -> top_GO
GO_n <- cluster_GO_sig %>%
filter(source=="GO:BP", intersection_size>=4) %>%
group_by(term_id) %>%
tally() %>%
arrange(desc(n))
GO_n <- dplyr::rename(GO_n, "n_clusters"=n)
cluster_GO_sig_n <- left_join(cluster_GO_sig, GO_n)
# get all terms for the top ones so that all clusters have values
top_GO_all <- filter(cluster_GO_df, term_id %in% top_GO$term_id)
#spread and plot
top_GO_sel <- select(top_GO_all, query, p_value, term_id, term_name)
spread_GO <- spread(top_GO_sel, key = query, p_value)
spread_GO[is.na(spread_GO)] <- 1
spread_GO_m <- as.matrix(-log10(spread_GO[3:ncol(spread_GO)]))
rownames(spread_GO_m) <- spread_GO$term_name
Joining with `by = join_by(term_id)`
options(repr.plot.width = 16, repr.plot.height = 10)
GO_hm <- Heatmap(spread_GO_m,
name = "-log10_pval",
heatmap_legend_param = list(title_position="topcenter", color_bar = "continuous"),
col = colorRamp2(c(0, 10),
c("beige", "#e31a1c")),
cluster_rows = T,
cluster_columns = T,
use_raster= FALSE,
show_column_names = TRUE,
show_row_names = TRUE,
show_row_dend = TRUE,
show_column_dend = TRUE,
clustering_distance_rows = "pearson",
clustering_distance_columns = "pearson",
row_names_gp = gpar(fontsize = 12))
# padding - bottom, left, top, right
draw(GO_hm, padding = unit(c(15, 15, 5, 80), "mm"), heatmap_legend_side = "left")
options(repr.plot.width=8, repr.plot.height=8)
# Get Module Eigengenes per cluster
MEs0 <- moduleEigengenes(y.list, mergedColors)$eigengenes
# Reorder modules so similar modules are next to each other
MEs0 <- orderMEs(MEs0)
module_order = names(MEs0) %>% gsub("ME","", .)
# Add treatment names
MEs0$treatment = row.names(MEs0)
# tidy & plot data
mME = MEs0 %>%
pivot_longer(-treatment) %>%
mutate(
name = gsub("ME", "", name),
name = factor(name, levels = module_order)
)
mME <- mME[-which(mME$name == "grey"),]
mME$treatment <- factor(mME$treatment, levels = seq(1,200))
mME$tricycle_position <- mME$treatment
#mME$name <- factor(mME$name, levels = rev(c("green", "grey", "black", "blue", "brown", "turquoise", "yellow", "red", "pink", "magenta")))
mME$name <- factor(mME$name, levels = rev(c("magenta", "blue", "black", "red", "green","purple", "turquoise", "pink", "yellow", "brown")))
mME$module <- as.character(mME$name)
mME$module[which(mME$module=="magenta")]="G1-M1"
mME$module[which(mME$module=="blue")]="G1-M2"
mME$module[which(mME$module=="black")]="G1S-M3"
mME$module[which(mME$module=="red")]="G1S-M4"
mME$module[which(mME$module=="green")]="G1S-M5"
mME$module[which(mME$module=="purple")]="G1S-M6"
mME$module[which(mME$module=="turquoise")]="S-M7"
mME$module[which(mME$module=="pink")]="S-M8"
mME$module[which(mME$module=="yellow")]="G2MG1-M9"
mME$module[which(mME$module=="brown")]="G2MG1-M10"
mME$module <- factor(mME$module, levels = rev(c("G1-M1", "G1-M2", "G1S-M3", "G1S-M4", "G1S-M5", "G1S-M6", "S-M7", "S-M8", "G2MG1-M9", "G2MG1-M10")))
mME %>% ggplot(., aes(x=tricycle_position, y=module, fill=value)) +
geom_tile() +
theme_bw() +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "white",
midpoint = 0,
# limit = c(-1,1)
) +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-trait Relationships", y = "Modules", fill="corr")
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_Gene_Module_Plot_CC.pdf",width=8, height=8)
print(mME %>% ggplot(., aes(x=tricycle_position, y=module, fill=value)) +
geom_tile() +
theme_bw() +
scale_fill_gradient2(
low = "blue",
high = "red",
mid = "white",
midpoint = 0,
# limit = c(-1,1)
) +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-trait Relationships", y = "Modules", fill="corr"))
dev.off()
head(MEs0)
| MEgreen | MEred | MEblack | MEblue | MEpurple | MEturquoise | MEmagenta | MEpink | MEbrown | MEyellow | MEgrey | treatment | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | |
| 1 | -0.06292724 | -0.11280193 | -0.09476705 | -0.0070958259 | 0.03907418 | -0.03679146 | 0.006469531 | 0.01471272 | 0.10465104 | 0.08181743 | 0.05303403 | 1 |
| 2 | -0.06319146 | -0.11042494 | -0.09141089 | -0.0038713676 | 0.03500705 | -0.03881049 | 0.008594824 | 0.01457642 | 0.10378887 | 0.07897368 | 0.05530398 | 2 |
| 3 | -0.06343191 | -0.10791123 | -0.08787816 | -0.0005945044 | 0.03082476 | -0.04081087 | 0.010797454 | 0.01448043 | 0.10282742 | 0.07603997 | 0.05741647 | 3 |
| 4 | -0.06364731 | -0.10526671 | -0.08417839 | 0.0027297818 | 0.02653601 | -0.04279118 | 0.013071424 | 0.01442213 | 0.10176978 | 0.07302121 | 0.05937155 | 4 |
| 5 | -0.06383638 | -0.10249730 | -0.08032112 | 0.0060965092 | 0.02214945 | -0.04475002 | 0.015410743 | 0.01439888 | 0.10061909 | 0.06992227 | 0.06116928 | 5 |
| 6 | -0.06399781 | -0.09960891 | -0.07631589 | 0.0095006959 | 0.01767377 | -0.04668598 | 0.017809415 | 0.01440806 | 0.09937845 | 0.06674806 | 0.06280971 | 6 |
module_df$colors <- factor(module_df$colors, levels=c("magenta", "blue", "black", "red", "green","purple", "turquoise", "pink", "yellow", "brown"))
table(module_df$colors)
magenta blue black red green purple turquoise pink
291 6649 384 503 687 264 6758 304
yellow brown
715 818
module_df$module <- as.character(module_df$colors)
module_df$module[which(module_df$module=="magenta")]="G1-M1"
module_df$module[which(module_df$module=="blue")]="G1-M2"
module_df$module[which(module_df$module=="black")]="G1S-M3"
module_df$module[which(module_df$module=="red")]="G1S-M4"
module_df$module[which(module_df$module=="green")]="G1S-M5"
module_df$module[which(module_df$module=="purple")]="G1S-M6"
module_df$module[which(module_df$module=="turquoise")]="S-M7"
module_df$module[which(module_df$module=="pink")]="S-M8"
module_df$module[which(module_df$module=="yellow")]="G2MG1-M9"
module_df$module[which(module_df$module=="brown")]="G2MG1-M10"
module_df$module <- factor(module_df$module, levels = c("G1-M1", "G1-M2", "G1S-M3", "G1S-M4", "G1S-M5", "G1S-M6", "S-M7", "S-M8", "G2MG1-M9", "G2MG1-M10"))
table(module_df$module)
G1-M1 G1-M2 G1S-M3 G1S-M4 G1S-M5 G1S-M6 S-M7 S-M8
291 6649 384 503 687 264 6758 304
G2MG1-M9 G2MG1-M10
715 818
module_df$BES_up <- rep("No",nrow(module_df))
module_df$BES_up[match(GL2up, module_df$gene_id)[!is.na(match(GL2up, module_df$gene_id))]]="Yes"
module_df$BES_down <- rep("No",nrow(module_df))
module_df$BES_down[match(GL2down, module_df$gene_id)[!is.na(match(GL2down, module_df$gene_id))]]="Yes"
module_df$MYB3R4 <- rep("No",nrow(module_df))
module_df$MYB3R4[match(GL3, module_df$gene_id)[!is.na(match(GL3, module_df$gene_id))]]="Yes"
## BES_up
round((table(module_df$module,module_df$BES_up)/as.numeric(table(module_df$module)))[,2]/sum((table(module_df$module,module_df$BES_up)/as.numeric(table(module_df$module)))[,2]),2)
table(module_df$module,module_df$BES_up)
No Yes
G1-M1 286 5
G1-M2 6230 419
G1S-M3 375 9
G1S-M4 486 17
G1S-M5 661 26
G1S-M6 259 5
S-M7 6631 127
S-M8 293 11
G2MG1-M9 692 23
G2MG1-M10 772 46
## BES_down
round((table(module_df$module,module_df$BES_down)/as.numeric(table(module_df$module)))[,2]/sum((table(module_df$module,module_df$BES_down)/as.numeric(table(module_df$module)))[,2]),2)
table(module_df$module,module_df$BES_down)
No Yes
G1-M1 274 17
G1-M2 6323 326
G1S-M3 366 18
G1S-M4 472 31
G1S-M5 650 37
G1S-M6 257 7
S-M7 6527 231
S-M8 289 15
G2MG1-M9 685 30
G2MG1-M10 785 33
## MYB3R4
round((table(module_df$module,module_df$MYB3R4)/as.numeric(table(module_df$module)))[,2]/sum((table(module_df$module,module_df$MYB3R4)/as.numeric(table(module_df$module)))[,2]),2)
table(module_df$module,module_df$MYB3R4)
No Yes
G1-M1 289 2
G1-M2 6616 33
G1S-M3 384 0
G1S-M4 502 1
G1S-M5 687 0
G1S-M6 264 0
S-M7 6734 24
S-M8 303 1
G2MG1-M9 700 15
G2MG1-M10 741 77
write.csv(module_df, "./tradeseq/All_genes_Phloem_Atlas_tricycle_module.csv")
# pick out a few modules of interest here
modules_of_interest = c("G1-M1", "G1-M2", "G1S-M3", "G1S-M4", "G1S-M5", "G1S-M6", "S-M7", "S-M8", "G2MG1-M9", "G2MG1-M10")
# Pull out list of genes in that module
submod = module_df %>%
subset(module %in% modules_of_interest)
row.names(module_df) = module_df$gene_id
# Get normalized expression for those genes
expr_normalized <- t(apply(y.list,2, function(x){(x-min(x))/(max(x)-min(x))}))
subexpr = expr_normalized[submod$gene_id,]
submod_df = data.frame(subexpr) %>%
mutate(
gene_id = row.names(.)
) %>%
pivot_longer(-gene_id) %>%
mutate(
module = module_df[gene_id,]$module
)
submod_df$name <- factor(submod_df$name, levels = paste0("X",seq(1,200)))
submod_df$module <- factor(submod_df$module, levels =c("G1-M1", "G1-M2", "G1S-M3", "G1S-M4", "G1S-M5", "G1S-M6", "S-M7", "S-M8", "G2MG1-M9", "G2MG1-M10"))
submod_df %>% ggplot(., aes(x=name, y=value, group=gene_id)) +
geom_line(aes(color = module),
alpha = 0.2) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 90)
) +
facet_grid(rows = vars(module)) +
labs(x = "tricycle position",
y = "scaled expression")
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_Gene_Module_Exp_Plot_CC.pdf",width=8, height=8)
print(submod_df %>% ggplot(., aes(x=name, y=value, group=gene_id)) +
geom_line(aes(color = module),
alpha = 0.2) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 90)
) +
facet_grid(rows = vars(module)) +
labs(x = "tricycle position",
y = "scaled expression"))
dev.off()
ME <- moduleEigengenes(y.list, mergedColors)$eigengenes
head(ME)
| MEblack | MEblue | MEbrown | MEgreen | MEgrey | MEmagenta | MEpink | MEpurple | MEred | MEturquoise | MEyellow | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
| 1 | -0.09476705 | -0.0070958259 | 0.10465104 | -0.06292724 | 0.05303403 | 0.006469531 | 0.01471272 | 0.03907418 | -0.11280193 | -0.03679146 | 0.08181743 |
| 2 | -0.09141089 | -0.0038713676 | 0.10378887 | -0.06319146 | 0.05530398 | 0.008594824 | 0.01457642 | 0.03500705 | -0.11042494 | -0.03881049 | 0.07897368 |
| 3 | -0.08787816 | -0.0005945044 | 0.10282742 | -0.06343191 | 0.05741647 | 0.010797454 | 0.01448043 | 0.03082476 | -0.10791123 | -0.04081087 | 0.07603997 |
| 4 | -0.08417839 | 0.0027297818 | 0.10176978 | -0.06364731 | 0.05937155 | 0.013071424 | 0.01442213 | 0.02653601 | -0.10526671 | -0.04279118 | 0.07302121 |
| 5 | -0.08032112 | 0.0060965092 | 0.10061909 | -0.06383638 | 0.06116928 | 0.015410743 | 0.01439888 | 0.02214945 | -0.10249730 | -0.04475002 | 0.06992227 |
| 6 | -0.07631589 | 0.0095006959 | 0.09937845 | -0.06399781 | 0.06280971 | 0.017809415 | 0.01440806 | 0.01767377 | -0.09960891 | -0.04668598 | 0.06674806 |
ME <- ME[,c("MEmagenta", "MEblue", "MEblack", "MEred","MEgreen", "MEpurple", "MEturquoise", "MEpink", "MEyellow", "MEbrown")]
colnames(ME)[which(colnames(ME)=="magenta")]="G1-M1"
colnames(ME)[which(colnames(ME)=="blue")]="G1-M2"
colnames(ME)[which(colnames(ME)=="black")]="G1S-M3"
colnames(ME)[which(colnames(ME)=="red")]="G1S-M4"
colnames(ME)[which(colnames(ME)=="green")]="G1S-M5"
colnames(ME)[which(colnames(ME)=="purple")]="G1S-M6"
colnames(ME)[which(colnames(ME)=="turquoise")]="S-M7"
colnames(ME)[which(colnames(ME)=="pink")]="S-M8"
colnames(ME)[which(colnames(ME)=="yellow")]="G2MG1-M9"
colnames(ME)[which(colnames(ME)=="brown")]="G2MG1-M10"
head(ME)
| MEmagenta | MEblue | MEblack | MEred | MEgreen | MEpurple | MEturquoise | MEpink | MEyellow | MEbrown | |
|---|---|---|---|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
| 1 | 0.006469531 | -0.0070958259 | -0.09476705 | -0.11280193 | -0.06292724 | 0.03907418 | -0.03679146 | 0.01471272 | 0.08181743 | 0.10465104 |
| 2 | 0.008594824 | -0.0038713676 | -0.09141089 | -0.11042494 | -0.06319146 | 0.03500705 | -0.03881049 | 0.01457642 | 0.07897368 | 0.10378887 |
| 3 | 0.010797454 | -0.0005945044 | -0.08787816 | -0.10791123 | -0.06343191 | 0.03082476 | -0.04081087 | 0.01448043 | 0.07603997 | 0.10282742 |
| 4 | 0.013071424 | 0.0027297818 | -0.08417839 | -0.10526671 | -0.06364731 | 0.02653601 | -0.04279118 | 0.01442213 | 0.07302121 | 0.10176978 |
| 5 | 0.015410743 | 0.0060965092 | -0.08032112 | -0.10249730 | -0.06383638 | 0.02214945 | -0.04475002 | 0.01439888 | 0.06992227 | 0.10061909 |
| 6 | 0.017809415 | 0.0095006959 | -0.07631589 | -0.09960891 | -0.06399781 | 0.01767377 | -0.04668598 | 0.01440806 | 0.06674806 | 0.09937845 |
datKME = signedKME(y.list[,module_df$gene_id[which(module_df$module != "Unassigned")]], ME, outputColumnName = "kME")
colnames(datKME) <- paste0("kME_",c("G1-M1", "G1-M2", "G1S-M3", "G1S-M4", "G1S-M5", "G1S-M6", "S-M7", "S-M8", "G2MG1-M9", "G2MG1-M10"))
head(datKME)
| kME_G1-M1 | kME_G1-M2 | kME_G1S-M3 | kME_G1S-M4 | kME_G1S-M5 | kME_G1S-M6 | kME_S-M7 | kME_S-M8 | kME_G2MG1-M9 | kME_G2MG1-M10 | |
|---|---|---|---|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
| AT1G01010 | -0.47635424 | -0.82896438 | 0.12114626 | 0.36334674 | 0.3858923 | 0.54098776 | 0.9592199 | 0.09904726 | 0.0367631 | -0.7066345 |
| AT1G01020 | -0.46743291 | 0.03862948 | 0.45900966 | 0.96528162 | 0.8442728 | -0.08192871 | 0.2245726 | -0.54946464 | -0.8431030 | -0.9562942 |
| AT1G01030 | -0.00883007 | -0.84503027 | -0.37749861 | -0.63440726 | -0.4577792 | 0.63722195 | 0.6936058 | 0.63137073 | 0.8968953 | 0.2902137 |
| AT1G01040 | -0.13993421 | -0.19381788 | 0.71774431 | 0.85715816 | 0.4612264 | -0.16769257 | 0.5411004 | -0.02071880 | -0.5418415 | -0.9031343 |
| AT1G01050 | 0.38247055 | 0.95869905 | 0.07054638 | -0.02776636 | -0.1258440 | -0.66810288 | -0.9936319 | -0.30285984 | -0.3706164 | 0.4249815 |
| AT1G01060 | -0.37395407 | -0.66413160 | 0.34985813 | 0.55427039 | 0.4150461 | 0.31508079 | 0.8824949 | 0.08371773 | -0.1560908 | -0.8163872 |
write.csv(left_join(rownames_to_column(datKME), rownames_to_column(module_df), by=c("rowname")), "./tradeseq/All_genes_Phloem_Atlas_tricycle_module_membership.csv")
genes_of_interest = module_df %>%
subset(module %in% modules_of_interest[-11])
expr_of_interest = t(y.list)[genes_of_interest$gene_id,]
expr_of_interest[1:5,1:5]
#> B-3 B-4 B-5 L-3 L-4
#> AC186512.3_FG001 6.901539 7.389644 6.975945 6.859593 7.370816
#> AC186512.3_FG007 7.919688 7.754506 7.670946 7.417760 7.988427
#> AC190623.3_FG001 6.575155 7.170788 7.438024 8.223261 8.008850
#> AC196475.3_FG004 6.054319 6.439899 6.424540 5.815344 6.565299
#> AC196475.3_FG005 6.194406 5.872273 6.207174 6.499828 6.314952
# Only recalculate TOM for modules of interest (faster, altho there's some online discussion if this will be slightly off)
TOM = TOMsimilarityFromExpr(t(expr_of_interest),
power = picked_power)
#> TOM calculation: adjacency..
#> ..will use 4 parallel threads.
#> Fraction of slow calculations: 0.000000
#> ..connectivity..
#> ..matrix multiplication (system BLAS)..
#> ..normalization..
#> ..done.
# Add gene names to row and columns
row.names(TOM) = row.names(expr_of_interest)
colnames(TOM) = row.names(expr_of_interest)
| AT1G01010 | 0.03437418 | 0.03445125 | 0.03452833 | 0.03460526 | 0.03468185 |
|---|---|---|---|---|---|
| AT1G01020 | 0.16125047 | 0.15936069 | 0.15746179 | 0.15555653 | 0.15364767 |
| AT1G01030 | 0.01163936 | 0.01141413 | 0.01118563 | 0.01095425 | 0.01072038 |
| AT1G01040 | 0.02817155 | 0.02762631 | 0.02707317 | 0.02651315 | 0.02594726 |
| AT1G01050 | 1.13815937 | 1.14230433 | 1.14652946 | 1.15082737 | 1.15519066 |
TOM calculation: adjacency.. ..will use 94 parallel threads. Fraction of slow calculations: 0.000000 ..connectivity.. ..matrix multiplication (system BLAS).. ..normalization.. ..done.
edge_list = data.frame(TOM) %>%
mutate(
gene1 = row.names(.)
) %>%
pivot_longer(-gene1) %>%
dplyr::rename(gene2 = name, correlation = value) %>%
unique() %>%
subset(!(gene1==gene2)) %>%
mutate(
module1 = module_df[gene1,]$module,
module2 = module_df[gene2,]$module
)
head(edge_list)
#> # A tibble: 6 x 5
#> gene1 gene2 correlation module1 module2
#> <chr> <chr> <dbl> <chr> <chr>
#> 1 AC186512.3_FG001 AC186512.3_FG007 0.0238 turquoise turquoise
#> 2 AC186512.3_FG001 AC190623.3_FG001 0.0719 turquoise turquoise
#> 3 AC186512.3_FG001 AC196475.3_FG004 0.143 turquoise turquoise
#> 4 AC186512.3_FG001 AC196475.3_FG005 0.0117 turquoise turquoise
#> 5 AC186512.3_FG001 AC196489.3_FG002 0.0181 turquoise turquoise
#> 6 AC186512.3_FG001 AC198481.3_FG004 0.0240 turquoise turquoise
# Export Network file to be read into Cytoscape, VisANT, etc
write_delim(edge_list,
file = "./tradeseq/Module_network_edgelist.tsv",
delim = "\t")
| gene1 | gene2 | correlation | module1 | module2 |
|---|---|---|---|---|
| <chr> | <chr> | <dbl> | <fct> | <fct> |
| AT1G01010 | AT1G01020 | 1.263899e-05 | G2MG1-M9 | S-M7 |
| AT1G01010 | AT1G01030 | 1.360577e-05 | G2MG1-M9 | S-M7 |
| AT1G01010 | AT1G01040 | 1.104410e-06 | G2MG1-M9 | S-M7 |
| AT1G01010 | AT1G01050 | 6.631369e-05 | G2MG1-M9 | G1-M2 |
| AT1G01010 | AT1G01060 | 7.753520e-04 | G2MG1-M9 | S-M7 |
| AT1G01010 | AT1G01070 | 3.555517e-08 | G2MG1-M9 | S-M7 |
## 30G too large
nrow(edge_list)
summary(edge_list$correlation)
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0000000 0.0000138 0.0056340 0.0982158 0.1385098 0.6644113
edge_list_cor04 <- edge_list %>% filter(correlation>=0.4)
## 2.6G acceptable
nrow(edge_list_cor04)
write_delim(edge_list_cor04,
file = "./tradeseq/Module_network_edgelist_cor04.tsv",
delim = "\t")
head(edge_list_cor04)
| gene1 | gene2 | correlation | module1 | module2 |
|---|---|---|---|---|
| <chr> | <chr> | <dbl> | <fct> | <fct> |
| AT1G01020 | AT1G01090 | 0.4742238 | S-M7 | S-M7 |
| AT1G01020 | AT1G01130 | 0.4721234 | S-M7 | S-M7 |
| AT1G01020 | AT1G01260 | 0.4529889 | S-M7 | S-M7 |
| AT1G01020 | AT1G01290 | 0.5408273 | S-M7 | S-M7 |
| AT1G01020 | AT1G01370 | 0.4995471 | S-M7 | S-M7 |
| AT1G01020 | AT1G01480 | 0.4947831 | S-M7 | S-M7 |
table(edge_list_cor04$module1)
G1-M1 G1-M2 G1-M3 G1-M4 G1S-M5 G1S-M6 S-M7
60171 8334861 179501 4033 1002022 49934 40704082
G2M-M8 G2MG1-M9 G1S-M10 Unassigned
2295271 11613 72 0
table(edge_list_cor04$module2)
G1-M1 G1-M2 G1-M3 G1-M4 G1S-M5 G1S-M6 S-M7
60171 8334861 179501 4033 1002022 49934 40704082
G2M-M8 G2MG1-M9 G1S-M10 Unassigned
2295271 11613 72 0
module_df <- read.csv("./tradeseq/All_genes_Phloem_Atlas_tricycle_module.csv", row.names=1)
head(module_df)
| gene_id | colors | module | BES_up | BES_down | MYB3R4 | |
|---|---|---|---|---|---|---|
| <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | |
| 1 | AT1G01010 | turquoise | S-M7 | No | No | No |
| 2 | AT1G01020 | red | G1S-M4 | No | No | No |
| 3 | AT1G01030 | turquoise | S-M7 | No | No | No |
| 4 | AT1G01040 | turquoise | S-M7 | No | No | No |
| 5 | AT1G01050 | blue | G1-M2 | No | No | No |
| 6 | AT1G01060 | turquoise | S-M7 | No | No | No |
## Prepare gene module list
module_sel <- select(module_df, gene_id, module)
module_list <- split(module_sel, f=module_sel$module)
#this makes list from long df of gene lists - TARGET is what we want to keep
module_list <- lapply(module_list, function(x) x[names(x)=="gene_id"])
# convert each sublist into character and eliminate duplicates
module_list <- lapply(module_list, function(x) as.character(unique(x$gene_id)))
module_list <- module_list[c("G1-M1", "G1-M2", "G1S-M3", "G1S-M4", "G1S-M5", "G1S-M6", "S-M7", "S-M8", "G2MG1-M9", "G2MG1-M10")]
## Prepare gene lists to intersect with module
BES_up <- module_df$gene_id[which(module_df$BES_up=="Yes")]
BES_down <- module_df$gene_id[which(module_df$BES_down=="Yes")]
MYB3R4 <- module_df$gene_id[which(module_df$MYB3R4=="Yes")]
g_list <- list("BES_up"=BES_up, "BES_down"=BES_down, "MYB3R4"=MYB3R4)
nrow(module_df)
## GeneOverlap
# number of integrated features
genome_size <- 17386L
#compare all lists
gom.self <- newGOM(module_list, g_list, genome.size=genome_size)
options(repr.plot.width = 10, repr.plot.height = 10)
int <- getNestedList(gom.self, "intersection")
int_matrix <- getMatrix(gom.self, "intersection")
p.val <- getMatrix(gom.self, "pval")
JC <- getMatrix(gom.self, "Jaccard")
# log of p.val for intersection
p.val_log <- -log10(p.val + 1e-200)
olap <- Heatmap(p.val_log,
name = "-log10_pval",
col = colorRamp2(c(0, 10),
c("beige", "red")),
column_title = "Number of shared genes",
cluster_rows = F,
cluster_columns = F,
use_raster= FALSE,
show_column_names = TRUE,
show_row_names = TRUE,
show_row_dend = TRUE,
heatmap_legend_param = list(title_gp = grid::gpar(fontsize = 12)),
column_title_gp = grid::gpar(fontsize = 18),
column_names_gp = grid::gpar(fontsize = 18),
row_names_gp = grid::gpar(fontsize = 18),
clustering_distance_rows = "pearson",
clustering_distance_columns = "pearson",
show_column_dend = TRUE, cell_fun = function(j, i, x, y, width, height, fill) {grid.text(sprintf("%.0f", int_matrix[i, j]), x, y, gp = gpar(fontsize = 18))
})
# padding - bottom, left, top, right
draw(olap, padding = unit(c(15, 5, 5, 10), "mm"), heatmap_legend_side = "left")
summary(as.numeric(as.matrix(p.val_log)))
Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00000 0.00416 0.12161 3.57974 0.80739 60.30735
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_Gene_Module_Intersection_With_GeneList.pdf",width=10, height=10)
draw(olap, padding = unit(c(15, 5, 5, 10), "mm"), heatmap_legend_side = "left")
dev.off()
bsn <- read.csv("./tradeseq/BR_bsn_genes.csv")
head(bsn)
| Name | AGI | |
|---|---|---|
| <chr> | <chr> | |
| 1 | DWF4 | AT3G50660 |
| 2 | CPD | AT5G05690 |
| 3 | DET2 | AT2G38050 |
| 4 | ROT3 | AT4G36380 |
| 5 | CYP90D1 | AT3G13730 |
| 6 | BR6OX1 | AT5G38970 |
length(module_df$gene_id)
s1 <- intersect(module_list$`G1-M2`, g_list$BES_up)
#s1 <- g_list$BES_up
length(s1)
#s2 <- intersect(module_list$`G1-M2`, g_list$BES_down)
#s2 <- g_list$BES_down
s2 <- intersect(module_df$gene_id, bsn$AGI)
length(s2)
s1
s2
save(s1,s2,file='./tradeseq/Phloem_Atlas_upper_lower_cells_gene_lists.rds')
ulgl <- list("Upper_cells"=s1, "Lower_cells"=s2)
# Find the maximum length
max_length <- max(lengths(ulgl))
# Fill with NA to make all equal length
ulgl_df <- data.frame(lapply(ulgl, function(x) c(x, rep(NA, max_length - length(x)))))
write.csv(ulgl_df, "./tradeseq/Phloem_Atlas_upper_lower_cells_gene_lists.csv")
rc.sub <- subset(rc.integrated, cells= colnames(rc.integrated)[which(rc.integrated$tricycleCCStage == "G1/G0")])
#rc.sub <- subset(rc.integrated, cells= colnames(rc.integrated)[which(rc.integrated$tricycleCCStage == "G1/G0" | rc.integrated$tricycleCCStage == "S")])
## 1177 G1 cells out of 410 cells
length(colnames(rc.integrated)[which(rc.integrated$tricycleCCStage == "G1/G0")])
## 1339 S cells
length(colnames(rc.integrated)[which(rc.integrated$tricycleCCStage == "S")])
## 1129 G2M cells
length(colnames(rc.integrated)[which(rc.integrated$tricycleCCStage == "G2M")])
zscore <- function(x){(x-mean(x))/sd(x)}
s1z <- as.numeric(apply(apply(as.matrix(rc.sub@assays$RNA@data)[s1,], 1, zscore), 1, function(x){mean(x,na.rm = TRUE)}))
s2z <- as.numeric(apply(apply(as.matrix(rc.sub@assays$RNA@data)[s2,], 1, zscore), 1, function(x){mean(x,na.rm = TRUE)}))
summary(s1z)
summary(s2z)
Min. 1st Qu. Median Mean 3rd Qu. Max. -0.40912 -0.25354 -0.04296 0.00000 0.16660 0.96865
Min. 1st Qu. Median Mean 3rd Qu. Max. -0.3550 -0.3550 -0.2367 0.0000 0.1256 9.0612
dat <- data.frame(cell_id=colnames(rc.sub), tricyclePosition=rc.sub$tricyclePosition, Signature1=s1z, Signature2=s2z)
dat <- dat %>% mutate(diff = abs(Signature1-Signature2))
range01 <- function(x){(x-min(x))/(max(x)-min(x))}
dat$Signature1_scaled <- range01(dat$Signature1)
dat$Signature2_scaled <- range01(dat$Signature2)
dat$ratio <- dat$Signature1_scaled/dat$Signature2_scaled
head(dat)
| cell_id | tricyclePosition | Signature1 | Signature2 | diff | Signature1_scaled | Signature2_scaled | ratio | |
|---|---|---|---|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
| APL_AAACCCACAGTTGAAA-1 | APL_AAACCCACAGTTGAAA-1 | 0.5938682 | 0.36621714 | -0.35503442 | 0.721251552 | 0.5627465 | 0.00000000 | Inf |
| APL_AAACCCATCTGCGAGC-1 | APL_AAACCCATCTGCGAGC-1 | 0.6420312 | 0.28601250 | 0.29078681 | 0.004774306 | 0.5045331 | 0.06858613 | 7.356197 |
| APL_AAAGAACTCGAGAAGC-1 | APL_AAAGAACTCGAGAAGC-1 | 0.6490424 | 0.13330378 | -0.35503442 | 0.488338194 | 0.3936955 | 0.00000000 | Inf |
| APL_AAAGGTAGTCTGTGAT-1 | APL_AAAGGTAGTCTGTGAT-1 | 0.7424053 | 0.11557090 | 1.17950494 | 1.063934039 | 0.3808248 | 0.16296787 | 2.336809 |
| APL_AAAGGTATCCTGTAGA-1 | APL_AAAGGTATCCTGTAGA-1 | 0.6932624 | -0.07080545 | -0.07856377 | 0.007758317 | 0.2455509 | 0.02936115 | 8.363123 |
| APL_AAAGTGAGTTTCCATT-1 | APL_AAAGTGAGTTTCCATT-1 | 0.5952930 | -0.08766026 | -0.35503442 | 0.267374160 | 0.2333175 | 0.00000000 | Inf |
dat$inianno <- rep("Unassigned", length(rc.integrated))
dat$anno <- rep("Unassigned", length(rc.integrated))
#dat$anno[which((dat$Signature1 >0) & (dat$Signature1 > dat$Signature2))] = "S1"
#dat$anno[which((dat$Signature2 >0) & (dat$Signature2 > dat$Signature1))] = "S2"
dat$inianno[which((dat$Signature1 > dat$Signature2))] = "S1"
dat$inianno[which((dat$Signature2 > dat$Signature1))] = "S2"
dat$anno[(dat$inianno == "S1")][(order(dat$Signature1[which(dat$inianno == "S1")]) < nrow(dat)/10)] = "S1"
dat$anno[(dat$inianno == "S2")][(order(dat$Signature2[which(dat$inianno == "S2")]) < nrow(dat)/10)] = "S2"
#dat$anno[which((dat$Signature1 >0) & (dat$Signature1 > dat$Signature2))] = "S1"
#dat$anno[which((dat$Signature2 >0) & (dat$Signature2 > dat$Signature1))] = "S2"
#dat$anno[which((dat$Signature1 > dat$Signature2))] = "S1"
#dat$anno[which((dat$Signature2 > dat$Signature1))] = "S2"
#dat$anno[which((dat$Signature1 > dat$Signature2) & (dat$Signature1 > as.numeric(quantile(dat$Signature1, 0.9))))] = "S1"
#dat$anno[which((dat$Signature2 > dat$Signature1) & (dat$Signature2 > as.numeric(quantile(dat$Signature2, 0.9))))] = "S2"
#dat$anno[which((dat$Signature1 > as.numeric(quantile(dat$Signature1, 0.9))))] = "S1"
#dat$anno[which((dat$Signature2 > as.numeric(quantile(dat$Signature2, 0.9))))] = "S2"
#dat$anno[which((dat$Signature1 > as.numeric(quantile(dat$Signature1, 0.9))) & (dat$Signature2 > as.numeric(quantile(dat$Signature2, 0.9))))] = "Unassigned"
head(dat)
| cell_id | tricyclePosition | Signature1 | Signature2 | diff | Signature1_scaled | Signature2_scaled | ratio | inianno | anno | |
|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <chr> | |
| APL_AAACCCACAGTTGAAA-1 | APL_AAACCCACAGTTGAAA-1 | 0.5938682 | 0.36621714 | -0.35503442 | 0.721251552 | 0.5627465 | 0.00000000 | Inf | S1 | Unassigned |
| APL_AAACCCATCTGCGAGC-1 | APL_AAACCCATCTGCGAGC-1 | 0.6420312 | 0.28601250 | 0.29078681 | 0.004774306 | 0.5045331 | 0.06858613 | 7.356197 | S2 | Unassigned |
| APL_AAAGAACTCGAGAAGC-1 | APL_AAAGAACTCGAGAAGC-1 | 0.6490424 | 0.13330378 | -0.35503442 | 0.488338194 | 0.3936955 | 0.00000000 | Inf | S1 | Unassigned |
| APL_AAAGGTAGTCTGTGAT-1 | APL_AAAGGTAGTCTGTGAT-1 | 0.7424053 | 0.11557090 | 1.17950494 | 1.063934039 | 0.3808248 | 0.16296787 | 2.336809 | S2 | Unassigned |
| APL_AAAGGTATCCTGTAGA-1 | APL_AAAGGTATCCTGTAGA-1 | 0.6932624 | -0.07080545 | -0.07856377 | 0.007758317 | 0.2455509 | 0.02936115 | 8.363123 | S1 | Unassigned |
| APL_AAAGTGAGTTTCCATT-1 | APL_AAAGTGAGTTTCCATT-1 | 0.5952930 | -0.08766026 | -0.35503442 | 0.267374160 | 0.2333175 | 0.00000000 | Inf | S1 | Unassigned |
table(dat$anno)
S1 S2 Unassigned
117 117 943
head(dat)
| cell_id | tricyclePosition | Signature1 | Signature2 | diff | Signature1_scaled | Signature2_scaled | ratio | inianno | anno | |
|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <chr> | <chr> | |
| APL_AAACCCACAGTTGAAA-1 | APL_AAACCCACAGTTGAAA-1 | 0.5938682 | 0.36621714 | -0.35503442 | 0.721251552 | 0.5627465 | 0.00000000 | Inf | S1 | Unassigned |
| APL_AAACCCATCTGCGAGC-1 | APL_AAACCCATCTGCGAGC-1 | 0.6420312 | 0.28601250 | 0.29078681 | 0.004774306 | 0.5045331 | 0.06858613 | 7.356197 | S2 | Unassigned |
| APL_AAAGAACTCGAGAAGC-1 | APL_AAAGAACTCGAGAAGC-1 | 0.6490424 | 0.13330378 | -0.35503442 | 0.488338194 | 0.3936955 | 0.00000000 | Inf | S1 | Unassigned |
| APL_AAAGGTAGTCTGTGAT-1 | APL_AAAGGTAGTCTGTGAT-1 | 0.7424053 | 0.11557090 | 1.17950494 | 1.063934039 | 0.3808248 | 0.16296787 | 2.336809 | S2 | Unassigned |
| APL_AAAGGTATCCTGTAGA-1 | APL_AAAGGTATCCTGTAGA-1 | 0.6932624 | -0.07080545 | -0.07856377 | 0.007758317 | 0.2455509 | 0.02936115 | 8.363123 | S1 | Unassigned |
| APL_AAAGTGAGTTTCCATT-1 | APL_AAAGTGAGTTTCCATT-1 | 0.5952930 | -0.08766026 | -0.35503442 | 0.267374160 | 0.2333175 | 0.00000000 | Inf | S1 | Unassigned |
nrow(dat)*0.2
options(repr.plot.width = 6, repr.plot.height = 6)
hist(dat$tricyclePosition, breaks=100)
dat %>%
ggplot( aes(x=tricyclePosition, fill=anno)) +
geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080","#cccccc")) +
#theme_ipsum() +
labs(fill="")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
table(dat$anno)
S1 S2 Unassigned
117 117 943
rc.re <- CreateSeuratObject(rc.sub@assays$RNA@data)
rc.re$upper_lower_anno <- dat$anno
rc.re <- subset(rc.re, cells=colnames(rc.re)[rc.re$upper_lower_anno !="Unassigned"])
Idents(rc.re) <- rc.re$upper_lower_anno
Clust_Markers <- FindAllMarkers(rc.re, only.pos = TRUE)
Calculating cluster S1 Calculating cluster S2
Clust_Markers <- Clust_Markers %>% filter(p_val <0.01) %>% arrange(p_val_adj)
Clust_Markers
| p_val | avg_log2FC | pct.1 | pct.2 | p_val_adj | cluster | gene | |
|---|---|---|---|---|---|---|---|
| <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <fct> | <chr> | |
| AT5G05690 | 9.510987e-27 | 1.0177259 | 0.889 | 0.171 | 1.654531e-22 | S2 | AT5G05690 |
| AT1G79430 | 4.910976e-15 | 1.8303819 | 0.752 | 0.333 | 8.543135e-11 | S1 | AT1G79430 |
| AT1G62045 | 2.394913e-13 | 2.3124610 | 0.487 | 0.077 | 4.166191e-09 | S1 | AT1G62045 |
| AT1G78040 | 8.201470e-13 | 1.0337960 | 1.000 | 0.991 | 1.426728e-08 | S1 | AT1G78040 |
| AT4G13600 | 8.346298e-13 | 1.4472378 | 0.436 | 0.043 | 1.451922e-08 | S1 | AT4G13600 |
| AT3G57150 | 1.068495e-12 | 0.8629181 | 0.821 | 0.444 | 1.858754e-08 | S2 | AT3G57150 |
| AT5G40630 | 1.317009e-12 | 0.5834987 | 0.607 | 0.162 | 2.291069e-08 | S2 | AT5G40630 |
| AT3G17730 | 1.459630e-12 | 2.0465267 | 0.402 | 0.026 | 2.539172e-08 | S1 | AT3G17730 |
| AT2G44120 | 2.968049e-12 | 0.6866236 | 0.974 | 0.957 | 5.163219e-08 | S2 | AT2G44120 |
| AT5G11100 | 3.882707e-12 | 0.6616619 | 0.547 | 0.128 | 6.754356e-08 | S1 | AT5G11100 |
| AT1G67430 | 4.389210e-12 | 0.4341335 | 1.000 | 1.000 | 7.635470e-08 | S2 | AT1G67430 |
| AT5G54660 | 4.602154e-12 | 1.1839677 | 0.547 | 0.154 | 8.005908e-08 | S1 | AT5G54660 |
| AT2G27030 | 4.698602e-12 | 0.9914067 | 1.000 | 0.974 | 8.173688e-08 | S1 | AT2G27030 |
| AT5G50720 | 4.788652e-12 | 1.5725179 | 0.709 | 0.274 | 8.330339e-08 | S1 | AT5G50720 |
| AT3G18670 | 6.497293e-12 | 0.8584067 | 0.385 | 0.026 | 1.130269e-07 | S1 | AT3G18670 |
| AT1G69970 | 7.611523e-12 | 1.8965612 | 0.538 | 0.145 | 1.324101e-07 | S1 | AT1G69970 |
| AT3G23830 | 8.800328e-12 | 0.8041325 | 0.846 | 0.513 | 1.530905e-07 | S2 | AT3G23830 |
| AT3G53620 | 1.619160e-11 | 1.3254813 | 0.846 | 0.726 | 2.816690e-07 | S1 | AT3G53620 |
| AT3G15357 | 2.592391e-11 | 0.6022229 | 0.795 | 0.350 | 4.509723e-07 | S2 | AT3G15357 |
| AT1G08500 | 3.034708e-11 | 0.8530849 | 0.393 | 0.043 | 5.279178e-07 | S1 | AT1G08500 |
| AT4G13850 | 3.286909e-11 | 0.6208412 | 0.915 | 0.615 | 5.717907e-07 | S2 | AT4G13850 |
| AT1G54330 | 3.567361e-11 | 1.4640766 | 0.419 | 0.060 | 6.205781e-07 | S1 | AT1G54330 |
| AT1G65910 | 3.925331e-11 | 1.2814174 | 0.376 | 0.034 | 6.828505e-07 | S1 | AT1G65910 |
| AT2G19460 | 3.938409e-11 | 1.5950227 | 0.632 | 0.248 | 6.851256e-07 | S1 | AT2G19460 |
| AT2G18380 | 4.339806e-11 | 2.3768423 | 0.453 | 0.094 | 7.549527e-07 | S1 | AT2G18380 |
| AT1G27920 | 4.619798e-11 | 0.7066776 | 0.350 | 0.017 | 8.036600e-07 | S1 | AT1G27920 |
| AT1G28395 | 4.927537e-11 | 0.5347208 | 0.855 | 0.496 | 8.571944e-07 | S2 | AT1G28395 |
| AT2G41430 | 5.176302e-11 | 1.0864806 | 0.991 | 0.991 | 9.004696e-07 | S1 | AT2G41430 |
| AT1G26880 | 5.487088e-11 | 0.4485385 | 1.000 | 1.000 | 9.545338e-07 | S2 | AT1G26880 |
| AT2G27510 | 6.605192e-11 | 0.6633250 | 0.983 | 0.872 | 1.149039e-06 | S2 | AT2G27510 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| AT1G68560 | 0.0004648052 | 0.4427290 | 0.359 | 0.162 | 1 | S2 | AT1G68560 |
| AT5G46430 | 0.0004691934 | 0.2695621 | 0.991 | 1.000 | 1 | S2 | AT5G46430 |
| AT5G15520 | 0.0004756162 | 0.2720809 | 0.974 | 0.923 | 1 | S2 | AT5G15520 |
| AT5G35360 | 0.0004849552 | 0.2782917 | 0.923 | 0.692 | 1 | S2 | AT5G35360 |
| AT1G30580 | 0.0005436506 | 0.3074841 | 0.966 | 0.863 | 1 | S2 | AT1G30580 |
| AT4G38740 | 0.0005536737 | 0.3112848 | 1.000 | 0.957 | 1 | S2 | AT4G38740 |
| AT4G26190 | 0.0005687662 | 0.2670946 | 0.949 | 0.744 | 1 | S2 | AT4G26190 |
| AT2G31410 | 0.0005925179 | 0.3005031 | 0.872 | 0.812 | 1 | S2 | AT2G31410 |
| AT1G69700 | 0.0007149404 | 0.3430698 | 0.778 | 0.564 | 1 | S2 | AT1G69700 |
| AT1G54050 | 0.0007966367 | 0.3250716 | 0.453 | 0.239 | 1 | S2 | AT1G54050 |
| AT2G01140 | 0.0010155677 | 0.3216506 | 0.974 | 0.863 | 1 | S2 | AT2G01140 |
| AT3G06355 | 0.0011133054 | 0.9501064 | 0.974 | 0.983 | 1 | S2 | AT3G06355 |
| AT1G07370 | 0.0011283879 | 0.2879332 | 0.744 | 0.470 | 1 | S2 | AT1G07370 |
| AT3G54470 | 0.0011510533 | 0.2698479 | 0.923 | 0.752 | 1 | S2 | AT3G54470 |
| AT2G28790 | 0.0012193291 | 0.4119428 | 0.949 | 0.872 | 1 | S2 | AT2G28790 |
| AT2G28000 | 0.0012580871 | 0.2603742 | 0.701 | 0.453 | 1 | S2 | AT2G28000 |
| AT4G39260 | 0.0012618168 | 0.3356570 | 0.991 | 0.940 | 1 | S2 | AT4G39260 |
| AT4G34870 | 0.0012663596 | 0.2804306 | 1.000 | 1.000 | 1 | S2 | AT4G34870 |
| AT5G47320 | 0.0013608532 | 0.2577975 | 0.906 | 0.735 | 1 | S2 | AT5G47320 |
| AT1G28290 | 0.0013639165 | 0.2591225 | 0.453 | 0.274 | 1 | S2 | AT1G28290 |
| AT5G20020 | 0.0013681568 | 0.2625871 | 1.000 | 1.000 | 1 | S2 | AT5G20020 |
| AT4G01100 | 0.0013958082 | 0.2810030 | 0.932 | 0.761 | 1 | S2 | AT4G01100 |
| AT5G10390 | 0.0014667215 | 0.4030072 | 0.615 | 0.350 | 1 | S2 | AT5G10390 |
| AT5G52240 | 0.0016375625 | 0.2656858 | 0.889 | 0.658 | 1 | S2 | AT5G52240 |
| AT4G39800 | 0.0032872168 | 0.3432616 | 0.709 | 0.504 | 1 | S2 | AT4G39800 |
| AT1G42960 | 0.0044464039 | 0.2652567 | 0.957 | 0.889 | 1 | S2 | AT1G42960 |
| AT4G09030 | 0.0044926863 | 0.2581813 | 0.974 | 0.915 | 1 | S2 | AT4G09030 |
| AT2G29570 | 0.0047618069 | 0.3217550 | 0.726 | 0.590 | 1 | S2 | AT2G29570 |
| AT2G23120 | 0.0054608406 | 0.2717179 | 0.803 | 0.581 | 1 | S2 | AT2G23120 |
| AT3G46940 | 0.0058290014 | 0.2915439 | 0.684 | 0.453 | 1 | S2 | AT3G46940 |
write.csv(Clust_Markers, "./tradeseq/Phloem_Atlas_G1_Upper_Lower_cells_DE_20240503.csv")
## Prepare gene module list
module_sel <- select(Clust_Markers, gene, cluster)
module_list <- split(module_sel, f=module_sel$cluster)
#this makes list from long df of gene lists - TARGET is what we want to keep
module_list <- lapply(module_list, function(x) x[names(x)=="gene"])
# convert each sublist into character and eliminate duplicates
module_list <- lapply(module_list, function(x) as.character(unique(x$gene)))
cluster_GO <- gost(module_list, organism = "athaliana", correction_method = "fdr", significant = F, multi_query = F)
cluster_GO_df <- cluster_GO[[1]]
cluster_GO_sig <- filter(cluster_GO_df, p_value<=0.01)
# top terms for each cluster
cluster_GO_sig %>%
filter(source=="GO:BP", intersection_size>=4) %>%
group_by(query) %>%
top_n(5, wt = -p_value) %>%
arrange(desc(p_value)) -> top_GO
GO_n <- cluster_GO_sig %>%
filter(source=="GO:BP", intersection_size>=4) %>%
group_by(term_id) %>%
tally() %>%
arrange(desc(n))
GO_n <- dplyr::rename(GO_n, "n_clusters"=n)
cluster_GO_sig_n <- left_join(cluster_GO_sig, GO_n)
# get all terms for the top ones so that all clusters have values
top_GO_all <- filter(cluster_GO_df, term_id %in% top_GO$term_id)
#spread and plot
top_GO_sel <- select(top_GO_all, query, p_value, term_id, term_name)
spread_GO <- spread(top_GO_sel, key = query, p_value)
spread_GO[is.na(spread_GO)] <- 1
spread_GO_m <- as.matrix(-log10(spread_GO[3:ncol(spread_GO)]))
rownames(spread_GO_m) <- spread_GO$term_name
Joining with `by = join_by(term_id)`
head(cluster_GO_sig)
| query | significant | p_value | term_size | query_size | intersection_size | precision | recall | term_id | source | term_name | effective_domain_size | source_order | parents | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <chr> | <lgl> | <dbl> | <int> | <int> | <int> | <dbl> | <dbl> | <chr> | <chr> | <chr> | <int> | <int> | <list> | |
| 1 | S1 | TRUE | 3.958123e-31 | 436 | 603 | 71 | 0.1177446 | 0.16284404 | GO:0016192 | GO:BP | vesicle-mediated transport | 21899 | 5229 | GO:00068.... |
| 2 | S1 | TRUE | 2.297663e-23 | 752 | 603 | 81 | 0.1343284 | 0.10771277 | GO:0045184 | GO:BP | establishment of protein localization | 21899 | 11702 | GO:00081.... |
| 3 | S1 | TRUE | 8.723130e-23 | 976 | 603 | 92 | 0.1525705 | 0.09426230 | GO:0051641 | GO:BP | cellular localization | 21899 | 14309 | GO:00099.... |
| 4 | S1 | TRUE | 1.713723e-22 | 712 | 603 | 77 | 0.1276949 | 0.10814607 | GO:0015031 | GO:BP | protein transport | 21899 | 4854 | GO:00451.... |
| 5 | S1 | TRUE | 3.336778e-22 | 812 | 603 | 82 | 0.1359867 | 0.10098522 | GO:0008104 | GO:BP | protein localization | 21899 | 3157 | GO:0070727 |
| 6 | S1 | TRUE | 6.497090e-22 | 841 | 603 | 83 | 0.1376451 | 0.09869203 | GO:0070727 | GO:BP | cellular macromolecule localization | 21899 | 16772 | GO:00330.... |
write.csv(cluster_GO_sig[,-14], "./tradeseq/Phloem_Atlas_G1_Upper_Lower_cells_GO_20240503.csv")
cluster_GO_sig <- read.csv("./tradeseq/Phloem_Atlas_G1_Upper_Lower_cells_GO_20240503_selected.csv")
cluster_GO_sig <- cluster_GO_sig %>% filter(include=="y")
cluster_GO_sig
| X | query | significant | p_value | term_size | query_size | intersection_size | precision | recall | term_id | source | term_name | effective_domain_size | source_order | include |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <int> | <chr> | <lgl> | <dbl> | <int> | <int> | <int> | <dbl> | <dbl> | <chr> | <chr> | <chr> | <int> | <int> | <chr> |
| 1 | S1 | TRUE | 3.960000e-31 | 436 | 603 | 71 | 0.117744610 | 0.16284404 | GO:0016192 | GO:BP | vesicle-mediated transport | 21899 | 5229 | y |
| 5 | S1 | TRUE | 3.340000e-22 | 812 | 603 | 82 | 0.135986733 | 0.10098522 | GO:0008104 | GO:BP | protein localization | 21899 | 3157 | y |
| 14 | S1 | TRUE | 1.740000e-11 | 457 | 603 | 45 | 0.074626866 | 0.09846827 | GO:0051649 | GO:BP | establishment of localization in cell | 21899 | 14317 | y |
| 47 | S1 | TRUE | 1.636065e-03 | 421 | 603 | 27 | 0.044776119 | 0.06413302 | GO:0016049 | GO:BP | cell growth | 21899 | 5141 | y |
| 48 | S1 | TRUE | 1.656147e-03 | 693 | 603 | 38 | 0.063018242 | 0.05483405 | GO:0071554 | GO:BP | cell wall organization or biogenesis | 21899 | 17260 | y |
| 222 | S2 | TRUE | 7.940000e-72 | 1403 | 419 | 152 | 0.362768496 | 0.10833927 | GO:0006412 | GO:BP | translation | 21899 | 2194 | y |
| 227 | S2 | TRUE | 4.120000e-63 | 5270 | 419 | 264 | 0.630071599 | 0.05009488 | GO:0010467 | GO:BP | gene expression | 21899 | 4207 | y |
| 233 | S2 | TRUE | 3.690000e-57 | 7313 | 419 | 301 | 0.718377088 | 0.04115958 | GO:0009058 | GO:BP | biosynthetic process | 21899 | 3271 | y |
| 257 | S2 | TRUE | 2.960000e-08 | 152 | 419 | 18 | 0.042959427 | 0.11842105 | GO:0006457 | GO:BP | protein folding | 21899 | 2227 | y |
| 260 | S2 | TRUE | 4.470000e-06 | 4 | 419 | 4 | 0.009546539 | 1.00000000 | GO:0006858 | GO:BP | extracellular transport | 21899 | 2551 | y |
cluster_GO_sig %>%
filter(source=="GO:BP", intersection_size>=4) %>%
group_by(query) %>%
top_n(10, wt = -p_value) %>%
arrange(desc(p_value)) -> top_GO
GO_n <- cluster_GO_sig %>%
filter(source=="GO:BP", intersection_size>=4) %>%
group_by(term_id) %>%
tally() %>%
arrange(desc(n))
GO_n <- dplyr::rename(GO_n, "n_clusters"=n)
cluster_GO_sig_n <- left_join(cluster_GO_sig, GO_n)
# get all terms for the top ones so that all clusters have values
top_GO_all <- filter(cluster_GO_df, term_id %in% top_GO$term_id)
#spread and plot
top_GO_sel <- select(top_GO_all, query, p_value, term_id, term_name)
spread_GO <- spread(top_GO_sel, key = query, p_value)
spread_GO[is.na(spread_GO)] <- 1
spread_GO_m <- as.matrix(-log10(spread_GO[3:ncol(spread_GO)]))
rownames(spread_GO_m) <- spread_GO$term_name
Joining with `by = join_by(term_id)`
spread_GO_m
| S1 | S2 | |
|---|---|---|
| translation | 0.000000 | 71.099944 |
| protein folding | 3.106984 | 7.528682 |
| extracellular transport | 0.000000 | 5.350108 |
| protein localization | 21.476673 | 0.000000 |
| biosynthetic process | 0.000000 | 56.432419 |
| gene expression | 0.000000 | 62.385030 |
| cell growth | 2.786199 | 0.000000 |
| vesicle-mediated transport | 30.402511 | 0.000000 |
| establishment of localization in cell | 10.760539 | 0.000000 |
| cell wall organization or biogenesis | 2.780901 | 0.000000 |
(spread_GO_m <- spread_GO_m[-2,])
| S1 | S2 | |
|---|---|---|
| translation | 0.000000 | 71.099944 |
| extracellular transport | 0.000000 | 5.350108 |
| protein localization | 21.476673 | 0.000000 |
| biosynthetic process | 0.000000 | 56.432419 |
| gene expression | 0.000000 | 62.385030 |
| cell growth | 2.786199 | 0.000000 |
| vesicle-mediated transport | 30.402511 | 0.000000 |
| establishment of localization in cell | 10.760539 | 0.000000 |
| cell wall organization or biogenesis | 2.780901 | 0.000000 |
options(repr.plot.width = 9, repr.plot.height = 6)
GO_hm <- Heatmap(spread_GO_m,
name = "-log10_pval",
heatmap_legend_param = list(title_position="topcenter", color_bar = "continuous",title_gp = grid::gpar(fontsize = 12)),
col = colorRamp2(c(0, 5),
c("beige", "#e31a1c")),
cluster_rows = T,
cluster_columns = F,
use_raster= FALSE,
show_column_names = TRUE,
show_row_names = TRUE,
show_row_dend = TRUE,
show_column_dend = TRUE,
column_title_gp = grid::gpar(fontsize = 18),
column_names_gp = grid::gpar(fontsize = 18),
clustering_distance_rows = "pearson",
clustering_distance_columns = "pearson",
row_names_gp = gpar(fontsize = 18))
# padding - bottom, left, top, right
draw(GO_hm, padding = unit(c(1, 10, 1, 100), "mm"), heatmap_legend_side = "left")
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_GO_20240503.pdf",width=9, height=6)
draw(GO_hm, padding = unit(c(1, 10, 1, 100), "mm"), heatmap_legend_side = "left")
dev.off()
GL3 <- read.csv("./tradeseq/BR_bsn_genes.csv")
GL3
| Name | AGI |
|---|---|
| <chr> | <chr> |
| DWF4 | AT3G50660 |
| CPD | AT5G05690 |
| DET2 | AT2G38050 |
| ROT3 | AT4G36380 |
| CYP90D1 | AT3G13730 |
| BR6OX1 | AT5G38970 |
| BR6OX2 | AT3G30180 |
rc.sub <- subset(rc.sub,cells=dat$cell_id)
rc.sub$anno <- dat$anno
# BAS1 catabolic gene
geneid <- 'AT2G26710'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()+theme_classic()+
geom_dotplot(binaxis='y', stackdir='center',
stackratio=1, dotsize=0.8)+
theme(text = element_text(size=20),
axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
axis.title.y = element_blank()) + ggtitle("BAS1")
Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_BAS1_Exp.pdf",width=3, height=4)
print(ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()+theme_classic()+
geom_dotplot(binaxis='y', stackdir='center',
stackratio=1, dotsize=0.8)+
theme(text = element_text(size=20),
axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
axis.title.y = element_blank()) + ggtitle("BAS1"))
dev.off()
Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
# SOB7 catabolic gene
geneid <- 'AT1G17060'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
# BEN1 catabolic gene
geneid <- 'AT2G45400'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
#OPS
geneid <- 'AT3G09070'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()+theme_classic()+
geom_dotplot(binaxis='y', stackdir='center',
stackratio=1, dotsize=0.1)+
theme(text = element_text(size=20),
axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
axis.title.y = element_blank()) + ggtitle("OPS")
Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
#OPL2
geneid <- 'AT2G38070'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
# CSI1
geneid <- 'AT2G22125'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()+theme_classic()+
#geom_dotplot(binaxis='y', stackdir='center',
# stackratio=1, dotsize=0.1)+
theme(text = element_text(size=20),
axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
axis.title.y = element_blank()) + ggtitle("CSI1")
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_CSI1_Exp.pdf",width=3, height=4)
print(ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()+theme_classic()+
#geom_dotplot(binaxis='y', stackdir='center',
# stackratio=1, dotsize=0.8)+
theme(text = element_text(size=20),
axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
axis.title.y = element_blank()) + ggtitle("CSI1"))
dev.off()
# CESA6
geneid <- 'AT5G64740'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()+theme_classic()+
#geom_dotplot(binaxis='y', stackdir='center',
# stackratio=1, dotsize=0.2)+
theme(text = element_text(size=20),
axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
axis.title.y = element_blank()) + ggtitle("CESA6")
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_CESA6_Exp.pdf",width=3, height=4)
print(ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()+theme_classic()+
#geom_dotplot(binaxis='y', stackdir='center',
# stackratio=1, dotsize=0.8)+
theme(text = element_text(size=20),
axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
axis.title.y = element_blank()) + ggtitle("CESA6"))
dev.off()
# DET2
geneid <- 'AT2G38050'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
# ROT3
geneid <- 'AT4G36380'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
# CYP90D1
geneid <- 'AT3G13730'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
# BR6OX1
geneid <- 'AT5G38970'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
# BR6OX2
geneid <- 'AT3G30180'
## Percentage expressed
length(which(rc.integrated$RNA@data[geneid,]>0))/410
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
#CPD(AT5G05690)
geneid <- 'AT5G05690'
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()+theme_classic()+
#geom_dotplot(binaxis='y', stackdir='center',
# stackratio=1, dotsize=0.1)+
theme(text = element_text(size=20),
axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
axis.title.y = element_blank()) + ggtitle("CPD")
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_CPD_Exp.pdf",width=3, height=4)
print(ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()+theme_classic()+
#geom_dotplot(binaxis='y', stackdir='center',
# stackratio=1, dotsize=0.8)+
theme(text = element_text(size=20),
axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
axis.title.y = element_blank()) + ggtitle("CPD"))
dev.off()
#DWF(AT3G50660)
geneid <- 'AT3G50660'
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()+theme_classic()+
#geom_dotplot(binaxis='y', stackdir='center',
# stackratio=1, dotsize=0.8)+
theme(text = element_text(size=20),
axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
axis.title.y = element_blank()) + ggtitle("DWF4")
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_DWF4_Exp.pdf",width=3, height=4)
print(ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()+theme_classic()+
#geom_dotplot(binaxis='y', stackdir='center',
# stackratio=1, dotsize=0.8)+
theme(text = element_text(size=20),
axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
axis.title.y = element_blank()) + ggtitle("DWF4"))
dev.off()
#PHB3(AT3G50660)
geneid <- 'AT5G40770'
toplt <- data.frame(gene_exp = c(rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S1")],rc.sub@assays$RNA@data[geneid,which(rc.sub$anno=="S2")]),
signature_group = c(rep("Upper cells", length(which(rc.sub$anno=="S1"))), rep("Lower cells", length(which(rc.sub$anno=="S2")))))
toplt$signature_group <- factor(toplt$signature_group, levels= c("Upper cells", "Lower cells"))
# Overlaid histograms
options(repr.plot.width=4, repr.plot.height=8)
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()
options(repr.plot.width=3, repr.plot.height=5)
# Change dotsize and stack ratio
ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()+theme_classic()+
#geom_dotplot(binaxis='y', stackdir='center',
# stackratio=1, dotsize=0.8)+
theme(text = element_text(size=20),
axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
axis.title.y = element_blank()) + ggtitle("PHB3")
grDevices::cairo_pdf("./pdfs/Phloem_Atlas_G1_Upper_Lower_cells_PHB3_Exp.pdf",width=3, height=4)
print(ggplot(toplt, aes(x=signature_group, y=gene_exp)) +
geom_boxplot()+theme_classic()+
#geom_dotplot(binaxis='y', stackdir='center',
# stackratio=1, dotsize=0.8)+
theme(text = element_text(size=20),
axis.text.x = element_text(angle=45, hjust=1), axis.title.x = element_blank(),
axis.title.y = element_blank()) + ggtitle("PHB3"))
dev.off()